LLVM  12.0.1
AArch64ISelLowering.cpp
Go to the documentation of this file.
1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64ISelLowering.h"
15 #include "AArch64ExpandImm.h"
17 #include "AArch64PerfectShuffle.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/ArrayRef.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SmallSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/ADT/Triple.h"
31 #include "llvm/ADT/Twine.h"
47 #include "llvm/IR/Attributes.h"
48 #include "llvm/IR/Constants.h"
49 #include "llvm/IR/DataLayout.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
54 #include "llvm/IR/GlobalValue.h"
55 #include "llvm/IR/IRBuilder.h"
56 #include "llvm/IR/Instruction.h"
57 #include "llvm/IR/Instructions.h"
58 #include "llvm/IR/IntrinsicInst.h"
59 #include "llvm/IR/Intrinsics.h"
60 #include "llvm/IR/IntrinsicsAArch64.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/OperandTraits.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/Type.h"
65 #include "llvm/IR/Use.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/MC/MCRegisterInfo.h"
68 #include "llvm/Support/Casting.h"
69 #include "llvm/Support/CodeGen.h"
71 #include "llvm/Support/Compiler.h"
72 #include "llvm/Support/Debug.h"
74 #include "llvm/Support/KnownBits.h"
80 #include <algorithm>
81 #include <bitset>
82 #include <cassert>
83 #include <cctype>
84 #include <cstdint>
85 #include <cstdlib>
86 #include <iterator>
87 #include <limits>
88 #include <tuple>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 using namespace llvm::PatternMatch;
94 
95 #define DEBUG_TYPE "aarch64-lower"
96 
97 STATISTIC(NumTailCalls, "Number of tail calls");
98 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
99 STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
100 
101 // FIXME: The necessary dtprel relocations don't seem to be supported
102 // well in the GNU bfd and gold linkers at the moment. Therefore, by
103 // default, for now, fall back to GeneralDynamic code generation.
105  "aarch64-elf-ldtls-generation", cl::Hidden,
106  cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
107  cl::init(false));
108 
109 static cl::opt<bool>
110 EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
111  cl::desc("Enable AArch64 logical imm instruction "
112  "optimization"),
113  cl::init(true));
114 
115 // Temporary option added for the purpose of testing functionality added
116 // to DAGCombiner.cpp in D92230. It is expected that this can be removed
117 // in future when both implementations will be based off MGATHER rather
118 // than the GLD1 nodes added for the SVE gather load intrinsics.
119 static cl::opt<bool>
120 EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
121  cl::desc("Combine extends of AArch64 masked "
122  "gather intrinsics"),
123  cl::init(true));
124 
125 /// Value type used for condition codes.
126 static const MVT MVT_CC = MVT::i32;
127 
128 static inline EVT getPackedSVEVectorVT(EVT VT) {
129  switch (VT.getSimpleVT().SimpleTy) {
130  default:
131  llvm_unreachable("unexpected element type for vector");
132  case MVT::i8:
133  return MVT::nxv16i8;
134  case MVT::i16:
135  return MVT::nxv8i16;
136  case MVT::i32:
137  return MVT::nxv4i32;
138  case MVT::i64:
139  return MVT::nxv2i64;
140  case MVT::f16:
141  return MVT::nxv8f16;
142  case MVT::f32:
143  return MVT::nxv4f32;
144  case MVT::f64:
145  return MVT::nxv2f64;
146  case MVT::bf16:
147  return MVT::nxv8bf16;
148  }
149 }
150 
151 // NOTE: Currently there's only a need to return integer vector types. If this
152 // changes then just add an extra "type" parameter.
154  switch (EC.getKnownMinValue()) {
155  default:
156  llvm_unreachable("unexpected element count for vector");
157  case 16:
158  return MVT::nxv16i8;
159  case 8:
160  return MVT::nxv8i16;
161  case 4:
162  return MVT::nxv4i32;
163  case 2:
164  return MVT::nxv2i64;
165  }
166 }
167 
168 static inline EVT getPromotedVTForPredicate(EVT VT) {
170  "Expected scalable predicate vector type!");
171  switch (VT.getVectorMinNumElements()) {
172  default:
173  llvm_unreachable("unexpected element count for vector");
174  case 2:
175  return MVT::nxv2i64;
176  case 4:
177  return MVT::nxv4i32;
178  case 8:
179  return MVT::nxv8i16;
180  case 16:
181  return MVT::nxv16i8;
182  }
183 }
184 
185 /// Returns true if VT's elements occupy the lowest bit positions of its
186 /// associated register class without any intervening space.
187 ///
188 /// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
189 /// same register class, but only nxv8f16 can be treated as a packed vector.
190 static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
191  assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
192  "Expected legal vector type!");
193  return VT.isFixedLengthVector() ||
195 }
196 
197 // Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
198 // predicate and end with a passthru value matching the result type.
199 static bool isMergePassthruOpcode(unsigned Opc) {
200  switch (Opc) {
201  default:
202  return false;
229  return true;
230  }
231 }
232 
234  const AArch64Subtarget &STI)
235  : TargetLowering(TM), Subtarget(&STI) {
236  // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
237  // we have to make something up. Arbitrarily, choose ZeroOrOne.
239  // When comparing vectors the result sets the different elements in the
240  // vector to all-one or all-zero.
242 
243  // Set up the register classes.
244  addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
245  addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
246 
247  if (Subtarget->hasFPARMv8()) {
248  addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
249  addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
250  addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
251  addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
252  addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
253  }
254 
255  if (Subtarget->hasNEON()) {
256  addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
257  addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
258  // Someone set us up the NEON.
259  addDRTypeForNEON(MVT::v2f32);
260  addDRTypeForNEON(MVT::v8i8);
261  addDRTypeForNEON(MVT::v4i16);
262  addDRTypeForNEON(MVT::v2i32);
263  addDRTypeForNEON(MVT::v1i64);
264  addDRTypeForNEON(MVT::v1f64);
265  addDRTypeForNEON(MVT::v4f16);
266  if (Subtarget->hasBF16())
267  addDRTypeForNEON(MVT::v4bf16);
268 
269  addQRTypeForNEON(MVT::v4f32);
270  addQRTypeForNEON(MVT::v2f64);
271  addQRTypeForNEON(MVT::v16i8);
272  addQRTypeForNEON(MVT::v8i16);
273  addQRTypeForNEON(MVT::v4i32);
274  addQRTypeForNEON(MVT::v2i64);
275  addQRTypeForNEON(MVT::v8f16);
276  if (Subtarget->hasBF16())
277  addQRTypeForNEON(MVT::v8bf16);
278  }
279 
280  if (Subtarget->hasSVE()) {
281  // Add legal sve predicate types
282  addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
283  addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
284  addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
285  addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
286 
287  // Add legal sve data types
288  addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
289  addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
290  addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
291  addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
292 
293  addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
294  addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
295  addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
296  addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
297  addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
298  addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
299 
300  if (Subtarget->hasBF16()) {
301  addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
302  addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
303  addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
304  }
305 
306  if (Subtarget->useSVEForFixedLengthVectors()) {
308  if (useSVEForFixedLengthVectorVT(VT))
309  addRegisterClass(VT, &AArch64::ZPRRegClass);
310 
312  if (useSVEForFixedLengthVectorVT(VT))
313  addRegisterClass(VT, &AArch64::ZPRRegClass);
314  }
315 
316  for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
325  }
326 
327  for (auto VT :
331 
332  for (auto VT :
334  MVT::nxv2f64 }) {
346  }
347  }
348 
349  // Compute derived properties from the register classes
351 
352  // Provide all sorts of operation actions
386 
390 
394 
396 
397  // Custom lowering hooks are needed for XOR
398  // to fold it into CSINC/CSINV.
401 
402  // Virtually no operation on f128 is legal, but LLVM can't expand them when
403  // there's a valid register class, so we need custom operations in most cases.
427 
428  // Lowering for many of the conversions is actually specified by the non-f128
429  // type. The LowerXXX function will be trivial when f128 isn't involved.
460 
461  // Variable arguments.
466 
467  // Variable-sized objects.
470 
471  if (Subtarget->isTargetWindows())
473  else
475 
476  // Constant pool entries
478 
479  // BlockAddress
481 
482  // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
491 
492  // AArch64 lacks both left-rotate and popcount instructions.
495  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
498  }
499 
500  // AArch64 doesn't have i32 MULH{S|U}.
503 
504  // AArch64 doesn't have {U|S}MUL_LOHI.
507 
511 
514 
517  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
520  }
527 
528  // Custom lower Add/Sub/Mul with overflow.
541 
550  if (Subtarget->hasFullFP16())
552  else
554 
588 
589  if (!Subtarget->hasFullFP16()) {
612 
613  // promote v4f16 to v4f32 when that is known to be safe.
622 
638 
659  }
660 
661  // AArch64 has implementations of a lot of rounding-like FP operations.
662  for (MVT Ty : {MVT::f32, MVT::f64}) {
677  }
678 
679  if (Subtarget->hasFullFP16()) {
690  }
691 
693 
695 
701 
702  // Generate outline atomics library calls only if LSE was not specified for
703  // subtarget
704  if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
730 #define LCALLNAMES(A, B, N) \
731  setLibcallName(A##N##_RELAX, #B #N "_relax"); \
732  setLibcallName(A##N##_ACQ, #B #N "_acq"); \
733  setLibcallName(A##N##_REL, #B #N "_rel"); \
734  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
735 #define LCALLNAME4(A, B) \
736  LCALLNAMES(A, B, 1) \
737  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
738 #define LCALLNAME5(A, B) \
739  LCALLNAMES(A, B, 1) \
740  LCALLNAMES(A, B, 2) \
741  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
742  LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
743  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
744  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
745  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
746  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
747  LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
748 #undef LCALLNAMES
749 #undef LCALLNAME4
750 #undef LCALLNAME5
751  }
752 
753  // 128-bit loads and stores can be done without expanding
756 
757  // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
758  // custom lowering, as there are no un-paired non-temporal stores and
759  // legalization will break up 256 bit inputs.
767 
768  // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
769  // This requires the Performance Monitors extension.
770  if (Subtarget->hasPerfMon())
772 
773  if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
774  getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
775  // Issue __sincos_stret if available.
778  } else {
781  }
782 
783  if (Subtarget->getTargetTriple().isOSMSVCRT()) {
784  // MSVCRT doesn't have powi; fall back to pow
785  setLibcallName(RTLIB::POWI_F32, nullptr);
786  setLibcallName(RTLIB::POWI_F64, nullptr);
787  }
788 
789  // Make floating-point constants legal for the large code model, so they don't
790  // become loads from the constant pool.
791  if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
794  }
795 
796  // AArch64 does not have floating-point extending loads, i1 sign-extending
797  // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
798  for (MVT VT : MVT::fp_valuetypes()) {
803  }
804  for (MVT VT : MVT::integer_valuetypes())
806 
814 
818 
819  // Indexed loads and stores are supported.
820  for (unsigned im = (unsigned)ISD::PRE_INC;
838  }
839 
840  // Trap.
844 
845  // We combine OR nodes for bitfield operations.
847  // Try to create BICs for vector ANDs.
849 
850  // Vector add and sub nodes may conceal a high-half opportunity.
851  // Also, try to fold ADD into CSINC/CSINV..
859 
863 
865 
873  if (Subtarget->supportsAddressTopByteIgnored())
875 
878 
880 
883 
889 
891 
892  // In case of strict alignment, avoid an excessive number of byte wide stores.
896 
901 
903 
907 
909 
911 
912  EnableExtLdPromotion = true;
913 
914  // Set required alignment.
916  // Set preferred alignments.
919 
920  // Only change the limit for entries in a jump table if specified by
921  // the sub target, but not at the command line.
922  unsigned MaxJT = STI.getMaximumJumpTableSize();
923  if (MaxJT && getMaximumJumpTableSize() == UINT_MAX)
925 
926  setHasExtractBitsInsn(true);
927 
929 
930  if (Subtarget->hasNEON()) {
931  // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
932  // silliness like this:
958 
964 
966 
967  // AArch64 doesn't have a direct vector ->f32 conversion instructions for
968  // elements smaller than i32, so promote the input to i32 first.
971  // i8 vector elements also need promotion to i32 for v8i8
974  // Similarly, there is no direct i32 -> f64 vector conversion instruction.
979  // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
980  // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
983 
984  if (Subtarget->hasFullFP16()) {
989  } else {
990  // when AArch64 doesn't have fullfp16 support, promote the input
991  // to i32 first.
996  }
997 
1000 
1001  // AArch64 doesn't have MUL.2d:
1003  // Custom handling for some quad-vector types to detect MULL.
1007 
1008  // Saturates
1009  for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1015  }
1016 
1017  // Vector reductions
1018  for (MVT VT : { MVT::v4f16, MVT::v2f32,
1020  if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1023 
1025  }
1026  }
1027  for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1034  }
1036 
1039  // Likewise, narrowing and extending vector loads/stores aren't handled
1040  // directly.
1041  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1043 
1044  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1047  } else {
1050  }
1053 
1056 
1057  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1058  setTruncStoreAction(VT, InnerVT, Expand);
1059  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1060  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1061  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1062  }
1063  }
1064 
1065  // AArch64 has implementations of a lot of rounding-like FP operations.
1066  for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1073  }
1074 
1075  if (Subtarget->hasFullFP16()) {
1076  for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1083  }
1084  }
1085 
1086  if (Subtarget->hasSVE())
1088 
1090  }
1091 
1092  if (Subtarget->hasSVE()) {
1093  // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
1094  // splat of 0 or undef) once vector selects supported in SVE codegen. See
1095  // D68877 for more details.
1096  for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1130  }
1131 
1132  // Illegal unpacked integer vector types.
1133  for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1136  }
1137 
1138  for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1147 
1148  // There are no legal MVT::nxv16f## based types.
1149  if (VT != MVT::nxv16i1) {
1152  }
1153  }
1154 
1186  }
1187 
1188  for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1192  }
1193 
1195 
1198 
1199  // NOTE: Currently this has to happen after computeRegisterProperties rather
1200  // than the preferred option of combining it with the addRegisterClass call.
1201  if (Subtarget->useSVEForFixedLengthVectors()) {
1203  if (useSVEForFixedLengthVectorVT(VT))
1204  addTypeForFixedLengthSVE(VT);
1206  if (useSVEForFixedLengthVectorVT(VT))
1207  addTypeForFixedLengthSVE(VT);
1208 
1209  // 64bit results can mean a bigger than NEON input.
1210  for (auto VT : {MVT::v8i8, MVT::v4i16})
1213 
1214  // 128bit results imply a bigger than NEON input.
1215  for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1217  for (auto VT : {MVT::v8f16, MVT::v4f32})
1219 
1220  // These operations are not supported on NEON but SVE can do them.
1255 
1256  // Int operations with no NEON support.
1257  for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1264  }
1265 
1266  // FP operations with no NEON support.
1267  for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1270 
1271  // Use SVE for vectors with more than 2 elements.
1272  for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1274  }
1275  }
1276 
1278 }
1279 
1280 void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
1281  assert(VT.isVector() && "VT should be a vector type");
1282 
1283  if (VT.isFloatingPoint()) {
1285  setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1286  setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1287  }
1288 
1289  // Mark vector float intrinsics as expand.
1290  if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1299 
1300  // But we do support custom-lowering for FCOPYSIGN.
1302  }
1303 
1315 
1319  for (MVT InnerVT : MVT::all_valuetypes())
1320  setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1321 
1322  // CNT supports only B element sizes, then use UADDLP to widen.
1323  if (VT != MVT::v8i8 && VT != MVT::v16i8)
1325 
1331 
1334 
1335  if (!VT.isFloatingPoint())
1337 
1338  // [SU][MIN|MAX] are available for all NEON types apart from i64.
1339  if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1340  for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1341  setOperationAction(Opcode, VT, Legal);
1342 
1343  // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1344  if (VT.isFloatingPoint() &&
1345  VT.getVectorElementType() != MVT::bf16 &&
1346  (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1347  for (unsigned Opcode :
1349  setOperationAction(Opcode, VT, Legal);
1350 
1351  if (Subtarget->isLittleEndian()) {
1352  for (unsigned im = (unsigned)ISD::PRE_INC;
1356  }
1357  }
1358 }
1359 
1360 void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1361  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
1362 
1363  // By default everything must be expanded.
1364  for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1366 
1367  // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1369 
1370  // Lower fixed length vector operations to scalable equivalents.
1429 }
1430 
1431 void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1432  addRegisterClass(VT, &AArch64::FPR64RegClass);
1433  addTypeForNEON(VT, MVT::v2i32);
1434 }
1435 
1436 void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1437  addRegisterClass(VT, &AArch64::FPR128RegClass);
1438  addTypeForNEON(VT, MVT::v4i32);
1439 }
1440 
1442  LLVMContext &C, EVT VT) const {
1443  if (!VT.isVector())
1444  return MVT::i32;
1445  if (VT.isScalableVector())
1448 }
1449 
1450 static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1451  const APInt &Demanded,
1453  unsigned NewOpc) {
1454  uint64_t OldImm = Imm, NewImm, Enc;
1455  uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1456 
1457  // Return if the immediate is already all zeros, all ones, a bimm32 or a
1458  // bimm64.
1459  if (Imm == 0 || Imm == Mask ||
1461  return false;
1462 
1463  unsigned EltSize = Size;
1464  uint64_t DemandedBits = Demanded.getZExtValue();
1465 
1466  // Clear bits that are not demanded.
1467  Imm &= DemandedBits;
1468 
1469  while (true) {
1470  // The goal here is to set the non-demanded bits in a way that minimizes
1471  // the number of switching between 0 and 1. In order to achieve this goal,
1472  // we set the non-demanded bits to the value of the preceding demanded bits.
1473  // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1474  // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1475  // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1476  // The final result is 0b11000011.
1477  uint64_t NonDemandedBits = ~DemandedBits;
1478  uint64_t InvertedImm = ~Imm & DemandedBits;
1479  uint64_t RotatedImm =
1480  ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1481  NonDemandedBits;
1482  uint64_t Sum = RotatedImm + NonDemandedBits;
1483  bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1484  uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1485  NewImm = (Imm | Ones) & Mask;
1486 
1487  // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1488  // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1489  // we halve the element size and continue the search.
1490  if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1491  break;
1492 
1493  // We cannot shrink the element size any further if it is 2-bits.
1494  if (EltSize == 2)
1495  return false;
1496 
1497  EltSize /= 2;
1498  Mask >>= EltSize;
1499  uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1500 
1501  // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1502  if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1503  return false;
1504 
1505  // Merge the upper and lower halves of Imm and DemandedBits.
1506  Imm |= Hi;
1507  DemandedBits |= DemandedBitsHi;
1508  }
1509 
1510  ++NumOptimizedImms;
1511 
1512  // Replicate the element across the register width.
1513  while (EltSize < Size) {
1514  NewImm |= NewImm << EltSize;
1515  EltSize *= 2;
1516  }
1517 
1518  (void)OldImm;
1519  assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
1520  "demanded bits should never be altered");
1521  assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm");
1522 
1523  // Create the new constant immediate node.
1524  EVT VT = Op.getValueType();
1525  SDLoc DL(Op);
1526  SDValue New;
1527 
1528  // If the new constant immediate is all-zeros or all-ones, let the target
1529  // independent DAG combine optimize this node.
1530  if (NewImm == 0 || NewImm == OrigMask) {
1531  New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1532  TLO.DAG.getConstant(NewImm, DL, VT));
1533  // Otherwise, create a machine node so that target independent DAG combine
1534  // doesn't undo this optimization.
1535  } else {
1537  SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1538  New = SDValue(
1539  TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1540  }
1541 
1542  return TLO.CombineTo(Op, New);
1543 }
1544 
1546  SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1547  TargetLoweringOpt &TLO) const {
1548  // Delay this optimization to as late as possible.
1549  if (!TLO.LegalOps)
1550  return false;
1551 
1553  return false;
1554 
1555  EVT VT = Op.getValueType();
1556  if (VT.isVector())
1557  return false;
1558 
1559  unsigned Size = VT.getSizeInBits();
1560  assert((Size == 32 || Size == 64) &&
1561  "i32 or i64 is expected after legalization.");
1562 
1563  // Exit early if we demand all bits.
1564  if (DemandedBits.countPopulation() == Size)
1565  return false;
1566 
1567  unsigned NewOpc;
1568  switch (Op.getOpcode()) {
1569  default:
1570  return false;
1571  case ISD::AND:
1572  NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1573  break;
1574  case ISD::OR:
1575  NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1576  break;
1577  case ISD::XOR:
1578  NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1579  break;
1580  }
1581  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1582  if (!C)
1583  return false;
1584  uint64_t Imm = C->getZExtValue();
1585  return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1586 }
1587 
1588 /// computeKnownBitsForTargetNode - Determine which of the bits specified in
1589 /// Mask are known to be either zero or one and return them Known.
1591  const SDValue Op, KnownBits &Known,
1592  const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1593  switch (Op.getOpcode()) {
1594  default:
1595  break;
1596  case AArch64ISD::CSEL: {
1597  KnownBits Known2;
1598  Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1599  Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1600  Known = KnownBits::commonBits(Known, Known2);
1601  break;
1602  }
1603  case AArch64ISD::LOADgot:
1604  case AArch64ISD::ADDlow: {
1605  if (!Subtarget->isTargetILP32())
1606  break;
1607  // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1608  Known.Zero = APInt::getHighBitsSet(64, 32);
1609  break;
1610  }
1611  case ISD::INTRINSIC_W_CHAIN: {
1612  ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1613  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1614  switch (IntID) {
1615  default: return;
1616  case Intrinsic::aarch64_ldaxr:
1617  case Intrinsic::aarch64_ldxr: {
1618  unsigned BitWidth = Known.getBitWidth();
1619  EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1620  unsigned MemBits = VT.getScalarSizeInBits();
1621  Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1622  return;
1623  }
1624  }
1625  break;
1626  }
1628  case ISD::INTRINSIC_VOID: {
1629  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1630  switch (IntNo) {
1631  default:
1632  break;
1633  case Intrinsic::aarch64_neon_umaxv:
1634  case Intrinsic::aarch64_neon_uminv: {
1635  // Figure out the datatype of the vector operand. The UMINV instruction
1636  // will zero extend the result, so we can mark as known zero all the
1637  // bits larger than the element datatype. 32-bit or larget doesn't need
1638  // this as those are legal types and will be handled by isel directly.
1639  MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1640  unsigned BitWidth = Known.getBitWidth();
1641  if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1642  assert(BitWidth >= 8 && "Unexpected width!");
1644  Known.Zero |= Mask;
1645  } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1646  assert(BitWidth >= 16 && "Unexpected width!");
1648  Known.Zero |= Mask;
1649  }
1650  break;
1651  } break;
1652  }
1653  }
1654  }
1655 }
1656 
1658  EVT) const {
1659  return MVT::i64;
1660 }
1661 
1663  EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1664  bool *Fast) const {
1665  if (Subtarget->requiresStrictAlign())
1666  return false;
1667 
1668  if (Fast) {
1669  // Some CPUs are fine with unaligned stores except for 128-bit ones.
1670  *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1671  // See comments in performSTORECombine() for more details about
1672  // these conditions.
1673 
1674  // Code that uses clang vector extensions can mark that it
1675  // wants unaligned accesses to be treated as fast by
1676  // underspecifying alignment to be 1 or 2.
1677  Align <= 2 ||
1678 
1679  // Disregard v2i64. Memcpy lowering produces those and splitting
1680  // them regresses performance on micro-benchmarks and olden/bh.
1681  VT == MVT::v2i64;
1682  }
1683  return true;
1684 }
1685 
1686 // Same as above but handling LLTs instead.
1688  LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1689  bool *Fast) const {
1690  if (Subtarget->requiresStrictAlign())
1691  return false;
1692 
1693  if (Fast) {
1694  // Some CPUs are fine with unaligned stores except for 128-bit ones.
1695  *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1696  Ty.getSizeInBytes() != 16 ||
1697  // See comments in performSTORECombine() for more details about
1698  // these conditions.
1699 
1700  // Code that uses clang vector extensions can mark that it
1701  // wants unaligned accesses to be treated as fast by
1702  // underspecifying alignment to be 1 or 2.
1703  Alignment <= 2 ||
1704 
1705  // Disregard v2i64. Memcpy lowering produces those and splitting
1706  // them regresses performance on micro-benchmarks and olden/bh.
1707  Ty == LLT::vector(2, 64);
1708  }
1709  return true;
1710 }
1711 
1712 FastISel *
1714  const TargetLibraryInfo *libInfo) const {
1715  return AArch64::createFastISel(funcInfo, libInfo);
1716 }
1717 
1718 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1719 #define MAKE_CASE(V) \
1720  case V: \
1721  return #V;
1722  switch ((AArch64ISD::NodeType)Opcode) {
1724  break;
1995  }
1996 #undef MAKE_CASE
1997  return nullptr;
1998 }
1999 
2002  MachineBasicBlock *MBB) const {
2003  // We materialise the F128CSEL pseudo-instruction as some control flow and a
2004  // phi node:
2005 
2006  // OrigBB:
2007  // [... previous instrs leading to comparison ...]
2008  // b.ne TrueBB
2009  // b EndBB
2010  // TrueBB:
2011  // ; Fallthrough
2012  // EndBB:
2013  // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2014 
2015  MachineFunction *MF = MBB->getParent();
2016  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2017  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2018  DebugLoc DL = MI.getDebugLoc();
2020 
2021  Register DestReg = MI.getOperand(0).getReg();
2022  Register IfTrueReg = MI.getOperand(1).getReg();
2023  Register IfFalseReg = MI.getOperand(2).getReg();
2024  unsigned CondCode = MI.getOperand(3).getImm();
2025  bool NZCVKilled = MI.getOperand(4).isKill();
2026 
2027  MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2028  MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2029  MF->insert(It, TrueBB);
2030  MF->insert(It, EndBB);
2031 
2032  // Transfer rest of current basic-block to EndBB
2033  EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2034  MBB->end());
2036 
2037  BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2038  BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2039  MBB->addSuccessor(TrueBB);
2040  MBB->addSuccessor(EndBB);
2041 
2042  // TrueBB falls through to the end.
2043  TrueBB->addSuccessor(EndBB);
2044 
2045  if (!NZCVKilled) {
2046  TrueBB->addLiveIn(AArch64::NZCV);
2047  EndBB->addLiveIn(AArch64::NZCV);
2048  }
2049 
2050  BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2051  .addReg(IfTrueReg)
2052  .addMBB(TrueBB)
2053  .addReg(IfFalseReg)
2054  .addMBB(MBB);
2055 
2056  MI.eraseFromParent();
2057  return EndBB;
2058 }
2059 
2061  MachineInstr &MI, MachineBasicBlock *BB) const {
2063  BB->getParent()->getFunction().getPersonalityFn())) &&
2064  "SEH does not use catchret!");
2065  return BB;
2066 }
2067 
2069  MachineInstr &MI, MachineBasicBlock *BB) const {
2070  switch (MI.getOpcode()) {
2071  default:
2072 #ifndef NDEBUG
2073  MI.dump();
2074 #endif
2075  llvm_unreachable("Unexpected instruction for custom inserter!");
2076 
2077  case AArch64::F128CSEL:
2078  return EmitF128CSEL(MI, BB);
2079 
2080  case TargetOpcode::STACKMAP:
2081  case TargetOpcode::PATCHPOINT:
2082  case TargetOpcode::STATEPOINT:
2083  return emitPatchPoint(MI, BB);
2084 
2085  case AArch64::CATCHRET:
2086  return EmitLoweredCatchRet(MI, BB);
2087  }
2088 }
2089 
2090 //===----------------------------------------------------------------------===//
2091 // AArch64 Lowering private implementation.
2092 //===----------------------------------------------------------------------===//
2093 
2094 //===----------------------------------------------------------------------===//
2095 // Lowering Code
2096 //===----------------------------------------------------------------------===//
2097 
2098 /// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2099 /// CC
2101  switch (CC) {
2102  default:
2103  llvm_unreachable("Unknown condition code!");
2104  case ISD::SETNE:
2105  return AArch64CC::NE;
2106  case ISD::SETEQ:
2107  return AArch64CC::EQ;
2108  case ISD::SETGT:
2109  return AArch64CC::GT;
2110  case ISD::SETGE:
2111  return AArch64CC::GE;
2112  case ISD::SETLT:
2113  return AArch64CC::LT;
2114  case ISD::SETLE:
2115  return AArch64CC::LE;
2116  case ISD::SETUGT:
2117  return AArch64CC::HI;
2118  case ISD::SETUGE:
2119  return AArch64CC::HS;
2120  case ISD::SETULT:
2121  return AArch64CC::LO;
2122  case ISD::SETULE:
2123  return AArch64CC::LS;
2124  }
2125 }
2126 
2127 /// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2130  AArch64CC::CondCode &CondCode2) {
2131  CondCode2 = AArch64CC::AL;
2132  switch (CC) {
2133  default:
2134  llvm_unreachable("Unknown FP condition!");
2135  case ISD::SETEQ:
2136  case ISD::SETOEQ:
2138  break;
2139  case ISD::SETGT:
2140  case ISD::SETOGT:
2142  break;
2143  case ISD::SETGE:
2144  case ISD::SETOGE:
2146  break;
2147  case ISD::SETOLT:
2149  break;
2150  case ISD::SETOLE:
2152  break;
2153  case ISD::SETONE:
2155  CondCode2 = AArch64CC::GT;
2156  break;
2157  case ISD::SETO:
2159  break;
2160  case ISD::SETUO:
2162  break;
2163  case ISD::SETUEQ:
2165  CondCode2 = AArch64CC::VS;
2166  break;
2167  case ISD::SETUGT:
2169  break;
2170  case ISD::SETUGE:
2172  break;
2173  case ISD::SETLT:
2174  case ISD::SETULT:
2176  break;
2177  case ISD::SETLE:
2178  case ISD::SETULE:
2180  break;
2181  case ISD::SETNE:
2182  case ISD::SETUNE:
2184  break;
2185  }
2186 }
2187 
2188 /// Convert a DAG fp condition code to an AArch64 CC.
2189 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2190 /// should be AND'ed instead of OR'ed.
2193  AArch64CC::CondCode &CondCode2) {
2194  CondCode2 = AArch64CC::AL;
2195  switch (CC) {
2196  default:
2197  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2198  assert(CondCode2 == AArch64CC::AL);
2199  break;
2200  case ISD::SETONE:
2201  // (a one b)
2202  // == ((a olt b) || (a ogt b))
2203  // == ((a ord b) && (a une b))
2205  CondCode2 = AArch64CC::NE;
2206  break;
2207  case ISD::SETUEQ:
2208  // (a ueq b)
2209  // == ((a uno b) || (a oeq b))
2210  // == ((a ule b) && (a uge b))
2212  CondCode2 = AArch64CC::LE;
2213  break;
2214  }
2215 }
2216 
2217 /// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2218 /// CC usable with the vector instructions. Fewer operations are available
2219 /// without a real NZCV register, so we have to use less efficient combinations
2220 /// to get the same effect.
2223  AArch64CC::CondCode &CondCode2,
2224  bool &Invert) {
2225  Invert = false;
2226  switch (CC) {
2227  default:
2228  // Mostly the scalar mappings work fine.
2229  changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2230  break;
2231  case ISD::SETUO:
2232  Invert = true;
2234  case ISD::SETO:
2236  CondCode2 = AArch64CC::GE;
2237  break;
2238  case ISD::SETUEQ:
2239  case ISD::SETULT:
2240  case ISD::SETULE:
2241  case ISD::SETUGT:
2242  case ISD::SETUGE:
2243  // All of the compare-mask comparisons are ordered, but we can switch
2244  // between the two by a double inversion. E.g. ULE == !OGT.
2245  Invert = true;
2246  changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2247  CondCode, CondCode2);
2248  break;
2249  }
2250 }
2251 
2252 static bool isLegalArithImmed(uint64_t C) {
2253  // Matches AArch64DAGToDAGISel::SelectArithImmed().
2254  bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2255  LLVM_DEBUG(dbgs() << "Is imm " << C
2256  << " legal: " << (IsLegal ? "yes\n" : "no\n"));
2257  return IsLegal;
2258 }
2259 
2260 // Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2261 // the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2262 // can be set differently by this operation. It comes down to whether
2263 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2264 // everything is fine. If not then the optimization is wrong. Thus general
2265 // comparisons are only valid if op2 != 0.
2266 //
2267 // So, finally, the only LLVM-native comparisons that don't mention C and V
2268 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2269 // the absence of information about op2.
2270 static bool isCMN(SDValue Op, ISD::CondCode CC) {
2271  return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2272  (CC == ISD::SETEQ || CC == ISD::SETNE);
2273 }
2274 
2276  SelectionDAG &DAG, SDValue Chain,
2277  bool IsSignaling) {
2278  EVT VT = LHS.getValueType();
2279  assert(VT != MVT::f128);
2280  assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented");
2281  unsigned Opcode =
2283  return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2284 }
2285 
2287  const SDLoc &dl, SelectionDAG &DAG) {
2288  EVT VT = LHS.getValueType();
2289  const bool FullFP16 =
2290  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2291 
2292  if (VT.isFloatingPoint()) {
2293  assert(VT != MVT::f128);
2294  if (VT == MVT::f16 && !FullFP16) {
2295  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2296  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2297  VT = MVT::f32;
2298  }
2299  return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2300  }
2301 
2302  // The CMP instruction is just an alias for SUBS, and representing it as
2303  // SUBS means that it's possible to get CSE with subtract operations.
2304  // A later phase can perform the optimization of setting the destination
2305  // register to WZR/XZR if it ends up being unused.
2306  unsigned Opcode = AArch64ISD::SUBS;
2307 
2308  if (isCMN(RHS, CC)) {
2309  // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2310  Opcode = AArch64ISD::ADDS;
2311  RHS = RHS.getOperand(1);
2312  } else if (isCMN(LHS, CC)) {
2313  // As we are looking for EQ/NE compares, the operands can be commuted ; can
2314  // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2315  Opcode = AArch64ISD::ADDS;
2316  LHS = LHS.getOperand(1);
2317  } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2318  if (LHS.getOpcode() == ISD::AND) {
2319  // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2320  // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2321  // of the signed comparisons.
2322  const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2323  DAG.getVTList(VT, MVT_CC),
2324  LHS.getOperand(0),
2325  LHS.getOperand(1));
2326  // Replace all users of (and X, Y) with newly generated (ands X, Y)
2327  DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2328  return ANDSNode.getValue(1);
2329  } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2330  // Use result of ANDS
2331  return LHS.getValue(1);
2332  }
2333  }
2334 
2335  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2336  .getValue(1);
2337 }
2338 
2339 /// \defgroup AArch64CCMP CMP;CCMP matching
2340 ///
2341 /// These functions deal with the formation of CMP;CCMP;... sequences.
2342 /// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2343 /// a comparison. They set the NZCV flags to a predefined value if their
2344 /// predicate is false. This allows to express arbitrary conjunctions, for
2345 /// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2346 /// expressed as:
2347 /// cmp A
2348 /// ccmp B, inv(CB), CA
2349 /// check for CB flags
2350 ///
2351 /// This naturally lets us implement chains of AND operations with SETCC
2352 /// operands. And we can even implement some other situations by transforming
2353 /// them:
2354 /// - We can implement (NEG SETCC) i.e. negating a single comparison by
2355 /// negating the flags used in a CCMP/FCCMP operations.
2356 /// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2357 /// by negating the flags we test for afterwards. i.e.
2358 /// NEG (CMP CCMP CCCMP ...) can be implemented.
2359 /// - Note that we can only ever negate all previously processed results.
2360 /// What we can not implement by flipping the flags to test is a negation
2361 /// of two sub-trees (because the negation affects all sub-trees emitted so
2362 /// far, so the 2nd sub-tree we emit would also affect the first).
2363 /// With those tools we can implement some OR operations:
2364 /// - (OR (SETCC A) (SETCC B)) can be implemented via:
2365 /// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2366 /// - After transforming OR to NEG/AND combinations we may be able to use NEG
2367 /// elimination rules from earlier to implement the whole thing as a
2368 /// CCMP/FCCMP chain.
2369 ///
2370 /// As complete example:
2371 /// or (or (setCA (cmp A)) (setCB (cmp B)))
2372 /// (and (setCC (cmp C)) (setCD (cmp D)))"
2373 /// can be reassociated to:
2374 /// or (and (setCC (cmp C)) setCD (cmp D))
2375 // (or (setCA (cmp A)) (setCB (cmp B)))
2376 /// can be transformed to:
2377 /// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2378 /// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2379 /// which can be implemented as:
2380 /// cmp C
2381 /// ccmp D, inv(CD), CC
2382 /// ccmp A, CA, inv(CD)
2383 /// ccmp B, CB, inv(CA)
2384 /// check for CB flags
2385 ///
2386 /// A counterexample is "or (and A B) (and C D)" which translates to
2387 /// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2388 /// can only implement 1 of the inner (not) operations, but not both!
2389 /// @{
2390 
2391 /// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2393  ISD::CondCode CC, SDValue CCOp,
2395  AArch64CC::CondCode OutCC,
2396  const SDLoc &DL, SelectionDAG &DAG) {
2397  unsigned Opcode = 0;
2398  const bool FullFP16 =
2399  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2400 
2401  if (LHS.getValueType().isFloatingPoint()) {
2402  assert(LHS.getValueType() != MVT::f128);
2403  if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2404  LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2405  RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2406  }
2407  Opcode = AArch64ISD::FCCMP;
2408  } else if (RHS.getOpcode() == ISD::SUB) {
2409  SDValue SubOp0 = RHS.getOperand(0);
2410  if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2411  // See emitComparison() on why we can only do this for SETEQ and SETNE.
2412  Opcode = AArch64ISD::CCMN;
2413  RHS = RHS.getOperand(1);
2414  }
2415  }
2416  if (Opcode == 0)
2417  Opcode = AArch64ISD::CCMP;
2418 
2419  SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2421  unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2422  SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2423  return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2424 }
2425 
2426 /// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2427 /// expressed as a conjunction. See \ref AArch64CCMP.
2428 /// \param CanNegate Set to true if we can negate the whole sub-tree just by
2429 /// changing the conditions on the SETCC tests.
2430 /// (this means we can call emitConjunctionRec() with
2431 /// Negate==true on this sub-tree)
2432 /// \param MustBeFirst Set to true if this subtree needs to be negated and we
2433 /// cannot do the negation naturally. We are required to
2434 /// emit the subtree first in this case.
2435 /// \param WillNegate Is true if are called when the result of this
2436 /// subexpression must be negated. This happens when the
2437 /// outer expression is an OR. We can use this fact to know
2438 /// that we have a double negation (or (or ...) ...) that
2439 /// can be implemented for free.
2440 static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2441  bool &MustBeFirst, bool WillNegate,
2442  unsigned Depth = 0) {
2443  if (!Val.hasOneUse())
2444  return false;
2445  unsigned Opcode = Val->getOpcode();
2446  if (Opcode == ISD::SETCC) {
2447  if (Val->getOperand(0).getValueType() == MVT::f128)
2448  return false;
2449  CanNegate = true;
2450  MustBeFirst = false;
2451  return true;
2452  }
2453  // Protect against exponential runtime and stack overflow.
2454  if (Depth > 6)
2455  return false;
2456  if (Opcode == ISD::AND || Opcode == ISD::OR) {
2457  bool IsOR = Opcode == ISD::OR;
2458  SDValue O0 = Val->getOperand(0);
2459  SDValue O1 = Val->getOperand(1);
2460  bool CanNegateL;
2461  bool MustBeFirstL;
2462  if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2463  return false;
2464  bool CanNegateR;
2465  bool MustBeFirstR;
2466  if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2467  return false;
2468 
2469  if (MustBeFirstL && MustBeFirstR)
2470  return false;
2471 
2472  if (IsOR) {
2473  // For an OR expression we need to be able to naturally negate at least
2474  // one side or we cannot do the transformation at all.
2475  if (!CanNegateL && !CanNegateR)
2476  return false;
2477  // If we the result of the OR will be negated and we can naturally negate
2478  // the leafs, then this sub-tree as a whole negates naturally.
2479  CanNegate = WillNegate && CanNegateL && CanNegateR;
2480  // If we cannot naturally negate the whole sub-tree, then this must be
2481  // emitted first.
2482  MustBeFirst = !CanNegate;
2483  } else {
2484  assert(Opcode == ISD::AND && "Must be OR or AND");
2485  // We cannot naturally negate an AND operation.
2486  CanNegate = false;
2487  MustBeFirst = MustBeFirstL || MustBeFirstR;
2488  }
2489  return true;
2490  }
2491  return false;
2492 }
2493 
2494 /// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2495 /// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2496 /// Tries to transform the given i1 producing node @p Val to a series compare
2497 /// and conditional compare operations. @returns an NZCV flags producing node
2498 /// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2499 /// transformation was not possible.
2500 /// \p Negate is true if we want this sub-tree being negated just by changing
2501 /// SETCC conditions.
2503  AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2505  // We're at a tree leaf, produce a conditional comparison operation.
2506  unsigned Opcode = Val->getOpcode();
2507  if (Opcode == ISD::SETCC) {
2508  SDValue LHS = Val->getOperand(0);
2509  SDValue RHS = Val->getOperand(1);
2510  ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2511  bool isInteger = LHS.getValueType().isInteger();
2512  if (Negate)
2513  CC = getSetCCInverse(CC, LHS.getValueType());
2514  SDLoc DL(Val);
2515  // Determine OutCC and handle FP special case.
2516  if (isInteger) {
2517  OutCC = changeIntCCToAArch64CC(CC);
2518  } else {
2520  AArch64CC::CondCode ExtraCC;
2521  changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2522  // Some floating point conditions can't be tested with a single condition
2523  // code. Construct an additional comparison in this case.
2524  if (ExtraCC != AArch64CC::AL) {
2525  SDValue ExtraCmp;
2526  if (!CCOp.getNode())
2527  ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2528  else
2529  ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2530  ExtraCC, DL, DAG);
2531  CCOp = ExtraCmp;
2532  Predicate = ExtraCC;
2533  }
2534  }
2535 
2536  // Produce a normal comparison if we are first in the chain
2537  if (!CCOp)
2538  return emitComparison(LHS, RHS, CC, DL, DAG);
2539  // Otherwise produce a ccmp.
2540  return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2541  DAG);
2542  }
2543  assert(Val->hasOneUse() && "Valid conjunction/disjunction tree");
2544 
2545  bool IsOR = Opcode == ISD::OR;
2546 
2547  SDValue LHS = Val->getOperand(0);
2548  bool CanNegateL;
2549  bool MustBeFirstL;
2550  bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2551  assert(ValidL && "Valid conjunction/disjunction tree");
2552  (void)ValidL;
2553 
2554  SDValue RHS = Val->getOperand(1);
2555  bool CanNegateR;
2556  bool MustBeFirstR;
2557  bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2558  assert(ValidR && "Valid conjunction/disjunction tree");
2559  (void)ValidR;
2560 
2561  // Swap sub-tree that must come first to the right side.
2562  if (MustBeFirstL) {
2563  assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
2564  std::swap(LHS, RHS);
2565  std::swap(CanNegateL, CanNegateR);
2566  std::swap(MustBeFirstL, MustBeFirstR);
2567  }
2568 
2569  bool NegateR;
2570  bool NegateAfterR;
2571  bool NegateL;
2572  bool NegateAfterAll;
2573  if (Opcode == ISD::OR) {
2574  // Swap the sub-tree that we can negate naturally to the left.
2575  if (!CanNegateL) {
2576  assert(CanNegateR && "at least one side must be negatable");
2577  assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
2578  assert(!Negate);
2579  std::swap(LHS, RHS);
2580  NegateR = false;
2581  NegateAfterR = true;
2582  } else {
2583  // Negate the left sub-tree if possible, otherwise negate the result.
2584  NegateR = CanNegateR;
2585  NegateAfterR = !CanNegateR;
2586  }
2587  NegateL = true;
2588  NegateAfterAll = !Negate;
2589  } else {
2590  assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree");
2591  assert(!Negate && "Valid conjunction/disjunction tree");
2592 
2593  NegateL = false;
2594  NegateR = false;
2595  NegateAfterR = false;
2596  NegateAfterAll = false;
2597  }
2598 
2599  // Emit sub-trees.
2600  AArch64CC::CondCode RHSCC;
2601  SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2602  if (NegateAfterR)
2603  RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2604  SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2605  if (NegateAfterAll)
2606  OutCC = AArch64CC::getInvertedCondCode(OutCC);
2607  return CmpL;
2608 }
2609 
2610 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2611 /// In some cases this is even possible with OR operations in the expression.
2612 /// See \ref AArch64CCMP.
2613 /// \see emitConjunctionRec().
2615  AArch64CC::CondCode &OutCC) {
2616  bool DummyCanNegate;
2617  bool DummyMustBeFirst;
2618  if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2619  return SDValue();
2620 
2621  return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2622 }
2623 
2624 /// @}
2625 
2626 /// Returns how profitable it is to fold a comparison's operand's shift and/or
2627 /// extension operations.
2629  auto isSupportedExtend = [&](SDValue V) {
2630  if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2631  return true;
2632 
2633  if (V.getOpcode() == ISD::AND)
2634  if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2635  uint64_t Mask = MaskCst->getZExtValue();
2636  return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2637  }
2638 
2639  return false;
2640  };
2641 
2642  if (!Op.hasOneUse())
2643  return 0;
2644 
2645  if (isSupportedExtend(Op))
2646  return 1;
2647 
2648  unsigned Opc = Op.getOpcode();
2649  if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2650  if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2651  uint64_t Shift = ShiftCst->getZExtValue();
2652  if (isSupportedExtend(Op.getOperand(0)))
2653  return (Shift <= 4) ? 2 : 1;
2654  EVT VT = Op.getValueType();
2655  if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2656  return 1;
2657  }
2658 
2659  return 0;
2660 }
2661 
2663  SDValue &AArch64cc, SelectionDAG &DAG,
2664  const SDLoc &dl) {
2665  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2666  EVT VT = RHS.getValueType();
2667  uint64_t C = RHSC->getZExtValue();
2668  if (!isLegalArithImmed(C)) {
2669  // Constant does not fit, try adjusting it by one?
2670  switch (CC) {
2671  default:
2672  break;
2673  case ISD::SETLT:
2674  case ISD::SETGE:
2675  if ((VT == MVT::i32 && C != 0x80000000 &&
2676  isLegalArithImmed((uint32_t)(C - 1))) ||
2677  (VT == MVT::i64 && C != 0x80000000ULL &&
2678  isLegalArithImmed(C - 1ULL))) {
2679  CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2680  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2681  RHS = DAG.getConstant(C, dl, VT);
2682  }
2683  break;
2684  case ISD::SETULT:
2685  case ISD::SETUGE:
2686  if ((VT == MVT::i32 && C != 0 &&
2687  isLegalArithImmed((uint32_t)(C - 1))) ||
2688  (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2689  CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2690  C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2691  RHS = DAG.getConstant(C, dl, VT);
2692  }
2693  break;
2694  case ISD::SETLE:
2695  case ISD::SETGT:
2696  if ((VT == MVT::i32 && C != INT32_MAX &&
2697  isLegalArithImmed((uint32_t)(C + 1))) ||
2698  (VT == MVT::i64 && C != INT64_MAX &&
2699  isLegalArithImmed(C + 1ULL))) {
2700  CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2701  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2702  RHS = DAG.getConstant(C, dl, VT);
2703  }
2704  break;
2705  case ISD::SETULE:
2706  case ISD::SETUGT:
2707  if ((VT == MVT::i32 && C != UINT32_MAX &&
2708  isLegalArithImmed((uint32_t)(C + 1))) ||
2709  (VT == MVT::i64 && C != UINT64_MAX &&
2710  isLegalArithImmed(C + 1ULL))) {
2711  CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2712  C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2713  RHS = DAG.getConstant(C, dl, VT);
2714  }
2715  break;
2716  }
2717  }
2718  }
2719 
2720  // Comparisons are canonicalized so that the RHS operand is simpler than the
2721  // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2722  // can fold some shift+extend operations on the RHS operand, so swap the
2723  // operands if that can be done.
2724  //
2725  // For example:
2726  // lsl w13, w11, #1
2727  // cmp w13, w12
2728  // can be turned into:
2729  // cmp w12, w11, lsl #1
2730  if (!isa<ConstantSDNode>(RHS) ||
2731  !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2732  SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2733 
2735  std::swap(LHS, RHS);
2737  }
2738  }
2739 
2740  SDValue Cmp;
2741  AArch64CC::CondCode AArch64CC;
2742  if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2743  const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2744 
2745  // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2746  // For the i8 operand, the largest immediate is 255, so this can be easily
2747  // encoded in the compare instruction. For the i16 operand, however, the
2748  // largest immediate cannot be encoded in the compare.
2749  // Therefore, use a sign extending load and cmn to avoid materializing the
2750  // -1 constant. For example,
2751  // movz w1, #65535
2752  // ldrh w0, [x0, #0]
2753  // cmp w0, w1
2754  // >
2755  // ldrsh w0, [x0, #0]
2756  // cmn w0, #1
2757  // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2758  // if and only if (sext LHS) == (sext RHS). The checks are in place to
2759  // ensure both the LHS and RHS are truly zero extended and to make sure the
2760  // transformation is profitable.
2761  if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2762  cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2763  cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2764  LHS.getNode()->hasNUsesOfValue(1, 0)) {
2765  int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2766  if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2767  SDValue SExt =
2768  DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2769  DAG.getValueType(MVT::i16));
2770  Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2771  RHS.getValueType()),
2772  CC, dl, DAG);
2773  AArch64CC = changeIntCCToAArch64CC(CC);
2774  }
2775  }
2776 
2777  if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2778  if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2779  if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2780  AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2781  }
2782  }
2783  }
2784 
2785  if (!Cmp) {
2786  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2787  AArch64CC = changeIntCCToAArch64CC(CC);
2788  }
2789  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2790  return Cmp;
2791 }
2792 
2793 static std::pair<SDValue, SDValue>
2795  assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
2796  "Unsupported value type");
2797  SDValue Value, Overflow;
2798  SDLoc DL(Op);
2799  SDValue LHS = Op.getOperand(0);
2800  SDValue RHS = Op.getOperand(1);
2801  unsigned Opc = 0;
2802  switch (Op.getOpcode()) {
2803  default:
2804  llvm_unreachable("Unknown overflow instruction!");
2805  case ISD::SADDO:
2806  Opc = AArch64ISD::ADDS;
2807  CC = AArch64CC::VS;
2808  break;
2809  case ISD::UADDO:
2810  Opc = AArch64ISD::ADDS;
2811  CC = AArch64CC::HS;
2812  break;
2813  case ISD::SSUBO:
2814  Opc = AArch64ISD::SUBS;
2815  CC = AArch64CC::VS;
2816  break;
2817  case ISD::USUBO:
2818  Opc = AArch64ISD::SUBS;
2819  CC = AArch64CC::LO;
2820  break;
2821  // Multiply needs a little bit extra work.
2822  case ISD::SMULO:
2823  case ISD::UMULO: {
2824  CC = AArch64CC::NE;
2825  bool IsSigned = Op.getOpcode() == ISD::SMULO;
2826  if (Op.getValueType() == MVT::i32) {
2827  unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2828  // For a 32 bit multiply with overflow check we want the instruction
2829  // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2830  // need to generate the following pattern:
2831  // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2832  LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2833  RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2834  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2835  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2836  DAG.getConstant(0, DL, MVT::i64));
2837  // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2838  // operation. We need to clear out the upper 32 bits, because we used a
2839  // widening multiply that wrote all 64 bits. In the end this should be a
2840  // noop.
2841  Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2842  if (IsSigned) {
2843  // The signed overflow check requires more than just a simple check for
2844  // any bit set in the upper 32 bits of the result. These bits could be
2845  // just the sign bits of a negative number. To perform the overflow
2846  // check we have to arithmetic shift right the 32nd bit of the result by
2847  // 31 bits. Then we compare the result to the upper 32 bits.
2848  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2849  DAG.getConstant(32, DL, MVT::i64));
2850  UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2851  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2852  DAG.getConstant(31, DL, MVT::i64));
2853  // It is important that LowerBits is last, otherwise the arithmetic
2854  // shift will not be folded into the compare (SUBS).
2855  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2856  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2857  .getValue(1);
2858  } else {
2859  // The overflow check for unsigned multiply is easy. We only need to
2860  // check if any of the upper 32 bits are set. This can be done with a
2861  // CMP (shifted register). For that we need to generate the following
2862  // pattern:
2863  // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2864  SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2865  DAG.getConstant(32, DL, MVT::i64));
2866  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2867  Overflow =
2868  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2869  DAG.getConstant(0, DL, MVT::i64),
2870  UpperBits).getValue(1);
2871  }
2872  break;
2873  }
2874  assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
2875  // For the 64 bit multiply
2876  Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2877  if (IsSigned) {
2878  SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2879  SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2880  DAG.getConstant(63, DL, MVT::i64));
2881  // It is important that LowerBits is last, otherwise the arithmetic
2882  // shift will not be folded into the compare (SUBS).
2883  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2884  Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2885  .getValue(1);
2886  } else {
2887  SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2888  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2889  Overflow =
2890  DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2891  DAG.getConstant(0, DL, MVT::i64),
2892  UpperBits).getValue(1);
2893  }
2894  break;
2895  }
2896  } // switch (...)
2897 
2898  if (Opc) {
2899  SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2900 
2901  // Emit the AArch64 operation with overflow check.
2902  Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2903  Overflow = Value.getValue(1);
2904  }
2905  return std::make_pair(Value, Overflow);
2906 }
2907 
2908 SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
2909  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
2910  return LowerToScalableOp(Op, DAG);
2911 
2912  SDValue Sel = Op.getOperand(0);
2913  SDValue Other = Op.getOperand(1);
2914  SDLoc dl(Sel);
2915 
2916  // If the operand is an overflow checking operation, invert the condition
2917  // code and kill the Not operation. I.e., transform:
2918  // (xor (overflow_op_bool, 1))
2919  // -->
2920  // (csel 1, 0, invert(cc), overflow_op_bool)
2921  // ... which later gets transformed to just a cset instruction with an
2922  // inverted condition code, rather than a cset + eor sequence.
2923  if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
2924  // Only lower legal XALUO ops.
2925  if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2926  return SDValue();
2927 
2928  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2929  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2931  SDValue Value, Overflow;
2932  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2933  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2934  return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2935  CCVal, Overflow);
2936  }
2937  // If neither operand is a SELECT_CC, give up.
2938  if (Sel.getOpcode() != ISD::SELECT_CC)
2939  std::swap(Sel, Other);
2940  if (Sel.getOpcode() != ISD::SELECT_CC)
2941  return Op;
2942 
2943  // The folding we want to perform is:
2944  // (xor x, (select_cc a, b, cc, 0, -1) )
2945  // -->
2946  // (csel x, (xor x, -1), cc ...)
2947  //
2948  // The latter will get matched to a CSINV instruction.
2949 
2950  ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2951  SDValue LHS = Sel.getOperand(0);
2952  SDValue RHS = Sel.getOperand(1);
2953  SDValue TVal = Sel.getOperand(2);
2954  SDValue FVal = Sel.getOperand(3);
2955 
2956  // FIXME: This could be generalized to non-integer comparisons.
2957  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2958  return Op;
2959 
2960  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2961  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2962 
2963  // The values aren't constants, this isn't the pattern we're looking for.
2964  if (!CFVal || !CTVal)
2965  return Op;
2966 
2967  // We can commute the SELECT_CC by inverting the condition. This
2968  // might be needed to make this fit into a CSINV pattern.
2969  if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2970  std::swap(TVal, FVal);
2971  std::swap(CTVal, CFVal);
2972  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2973  }
2974 
2975  // If the constants line up, perform the transform!
2976  if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2977  SDValue CCVal;
2978  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2979 
2980  FVal = Other;
2981  TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2982  DAG.getConstant(-1ULL, dl, Other.getValueType()));
2983 
2984  return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2985  CCVal, Cmp);
2986  }
2987 
2988  return Op;
2989 }
2990 
2992  EVT VT = Op.getValueType();
2993 
2994  // Let legalize expand this if it isn't a legal type yet.
2995  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2996  return SDValue();
2997 
2998  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2999 
3000  unsigned Opc;
3001  bool ExtraOp = false;
3002  switch (Op.getOpcode()) {
3003  default:
3004  llvm_unreachable("Invalid code");
3005  case ISD::ADDC:
3006  Opc = AArch64ISD::ADDS;
3007  break;
3008  case ISD::SUBC:
3009  Opc = AArch64ISD::SUBS;
3010  break;
3011  case ISD::ADDE:
3012  Opc = AArch64ISD::ADCS;
3013  ExtraOp = true;
3014  break;
3015  case ISD::SUBE:
3016  Opc = AArch64ISD::SBCS;
3017  ExtraOp = true;
3018  break;
3019  }
3020 
3021  if (!ExtraOp)
3022  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3023  return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3024  Op.getOperand(2));
3025 }
3026 
3028  // Let legalize expand this if it isn't a legal type yet.
3029  if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3030  return SDValue();
3031 
3032  SDLoc dl(Op);
3034  // The actual operation that sets the overflow or carry flag.
3035  SDValue Value, Overflow;
3036  std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3037 
3038  // We use 0 and 1 as false and true values.
3039  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3040  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3041 
3042  // We use an inverted condition, because the conditional select is inverted
3043  // too. This will allow it to be selected to a single instruction:
3044  // CSINC Wd, WZR, WZR, invert(cond).
3045  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3046  Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3047  CCVal, Overflow);
3048 
3049  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3050  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3051 }
3052 
3053 // Prefetch operands are:
3054 // 1: Address to prefetch
3055 // 2: bool isWrite
3056 // 3: int locality (0 = no locality ... 3 = extreme locality)
3057 // 4: bool isDataCache
3059  SDLoc DL(Op);
3060  unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3061  unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3062  unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3063 
3064  bool IsStream = !Locality;
3065  // When the locality number is set
3066  if (Locality) {
3067  // The front-end should have filtered out the out-of-range values
3068  assert(Locality <= 3 && "Prefetch locality out-of-range");
3069  // The locality degree is the opposite of the cache speed.
3070  // Put the number the other way around.
3071  // The encoding starts at 0 for level 1
3072  Locality = 3 - Locality;
3073  }
3074 
3075  // built the mask value encoding the expected behavior.
3076  unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3077  (!IsData << 3) | // IsDataCache bit
3078  (Locality << 1) | // Cache level bits
3079  (unsigned)IsStream; // Stream bit
3080  return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3081  DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3082 }
3083 
3084 SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3085  SelectionDAG &DAG) const {
3086  if (Op.getValueType().isScalableVector())
3087  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3088 
3089  assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
3090  return SDValue();
3091 }
3092 
3093 SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3094  SelectionDAG &DAG) const {
3095  if (Op.getValueType().isScalableVector())
3096  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3097 
3098  bool IsStrict = Op->isStrictFPOpcode();
3099  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3100  EVT SrcVT = SrcVal.getValueType();
3101 
3102  if (SrcVT != MVT::f128) {
3103  // Expand cases where the input is a vector bigger than NEON.
3104  if (useSVEForFixedLengthVectorVT(SrcVT))
3105  return SDValue();
3106 
3107  // It's legal except when f128 is involved
3108  return Op;
3109  }
3110 
3111  return SDValue();
3112 }
3113 
3114 SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3115  SelectionDAG &DAG) const {
3116  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3117  // Any additional optimization in this function should be recorded
3118  // in the cost tables.
3119  EVT InVT = Op.getOperand(0).getValueType();
3120  EVT VT = Op.getValueType();
3121 
3122  if (VT.isScalableVector()) {
3123  unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3126  return LowerToPredicatedOp(Op, DAG, Opcode);
3127  }
3128 
3129  unsigned NumElts = InVT.getVectorNumElements();
3130 
3131  // f16 conversions are promoted to f32 when full fp16 is not supported.
3132  if (InVT.getVectorElementType() == MVT::f16 &&
3133  !Subtarget->hasFullFP16()) {
3134  MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3135  SDLoc dl(Op);
3136  return DAG.getNode(
3137  Op.getOpcode(), dl, Op.getValueType(),
3138  DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3139  }
3140 
3141  uint64_t VTSize = VT.getFixedSizeInBits();
3142  uint64_t InVTSize = InVT.getFixedSizeInBits();
3143  if (VTSize < InVTSize) {
3144  SDLoc dl(Op);
3145  SDValue Cv =
3146  DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3147  Op.getOperand(0));
3148  return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3149  }
3150 
3151  if (VTSize > InVTSize) {
3152  SDLoc dl(Op);
3153  MVT ExtVT =
3155  VT.getVectorNumElements());
3156  SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3157  return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3158  }
3159 
3160  // Type changing conversions are illegal.
3161  return Op;
3162 }
3163 
3164 SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3165  SelectionDAG &DAG) const {
3166  bool IsStrict = Op->isStrictFPOpcode();
3167  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3168 
3169  if (SrcVal.getValueType().isVector())
3170  return LowerVectorFP_TO_INT(Op, DAG);
3171 
3172  // f16 conversions are promoted to f32 when full fp16 is not supported.
3173  if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3174  assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
3175  SDLoc dl(Op);
3176  return DAG.getNode(
3177  Op.getOpcode(), dl, Op.getValueType(),
3178  DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3179  }
3180 
3181  if (SrcVal.getValueType() != MVT::f128) {
3182  // It's legal except when f128 is involved
3183  return Op;
3184  }
3185 
3186  return SDValue();
3187 }
3188 
3189 SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3190  SelectionDAG &DAG) const {
3191  // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3192  // Any additional optimization in this function should be recorded
3193  // in the cost tables.
3194  EVT VT = Op.getValueType();
3195  SDLoc dl(Op);
3196  SDValue In = Op.getOperand(0);
3197  EVT InVT = In.getValueType();
3198  unsigned Opc = Op.getOpcode();
3199  bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3200 
3201  if (VT.isScalableVector()) {
3202  if (InVT.getVectorElementType() == MVT::i1) {
3203  // We can't directly extend an SVE predicate; extend it first.
3204  unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3205  EVT CastVT = getPromotedVTForPredicate(InVT);
3206  In = DAG.getNode(CastOpc, dl, CastVT, In);
3207  return DAG.getNode(Opc, dl, VT, In);
3208  }
3209 
3210  unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3212  return LowerToPredicatedOp(Op, DAG, Opcode);
3213  }
3214 
3215  uint64_t VTSize = VT.getFixedSizeInBits();
3216  uint64_t InVTSize = InVT.getFixedSizeInBits();
3217  if (VTSize < InVTSize) {
3218  MVT CastVT =
3220  InVT.getVectorNumElements());
3221  In = DAG.getNode(Opc, dl, CastVT, In);
3222  return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3223  }
3224 
3225  if (VTSize > InVTSize) {
3226  unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3227  EVT CastVT = VT.changeVectorElementTypeToInteger();
3228  In = DAG.getNode(CastOpc, dl, CastVT, In);
3229  return DAG.getNode(Opc, dl, VT, In);
3230  }
3231 
3232  return Op;
3233 }
3234 
3235 SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3236  SelectionDAG &DAG) const {
3237  if (Op.getValueType().isVector())
3238  return LowerVectorINT_TO_FP(Op, DAG);
3239 
3240  bool IsStrict = Op->isStrictFPOpcode();
3241  SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3242 
3243  // f16 conversions are promoted to f32 when full fp16 is not supported.
3244  if (Op.getValueType() == MVT::f16 &&
3245  !Subtarget->hasFullFP16()) {
3246  assert(!IsStrict && "Lowering of strict fp16 not yet implemented");
3247  SDLoc dl(Op);
3248  return DAG.getNode(
3249  ISD::FP_ROUND, dl, MVT::f16,
3250  DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3251  DAG.getIntPtrConstant(0, dl));
3252  }
3253 
3254  // i128 conversions are libcalls.
3255  if (SrcVal.getValueType() == MVT::i128)
3256  return SDValue();
3257 
3258  // Other conversions are legal, unless it's to the completely software-based
3259  // fp128.
3260  if (Op.getValueType() != MVT::f128)
3261  return Op;
3262  return SDValue();
3263 }
3264 
3265 SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3266  SelectionDAG &DAG) const {
3267  // For iOS, we want to call an alternative entry point: __sincos_stret,
3268  // which returns the values in two S / D registers.
3269  SDLoc dl(Op);
3270  SDValue Arg = Op.getOperand(0);
3271  EVT ArgVT = Arg.getValueType();
3272  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3273 
3274  ArgListTy Args;
3275  ArgListEntry Entry;
3276 
3277  Entry.Node = Arg;
3278  Entry.Ty = ArgTy;
3279  Entry.IsSExt = false;
3280  Entry.IsZExt = false;
3281  Args.push_back(Entry);
3282 
3283  RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3284  : RTLIB::SINCOS_STRET_F32;
3285  const char *LibcallName = getLibcallName(LC);
3286  SDValue Callee =
3287  DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3288 
3289  StructType *RetTy = StructType::get(ArgTy, ArgTy);
3291  CLI.setDebugLoc(dl)
3292  .setChain(DAG.getEntryNode())
3293  .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3294 
3295  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3296  return CallResult.first;
3297 }
3298 
3300  EVT OpVT = Op.getValueType();
3301  if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3302  return SDValue();
3303 
3304  assert(Op.getOperand(0).getValueType() == MVT::i16);
3305  SDLoc DL(Op);
3306 
3307  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3308  Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3309  return SDValue(
3310  DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3311  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3312  0);
3313 }
3314 
3315 static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3316  if (OrigVT.getSizeInBits() >= 64)
3317  return OrigVT;
3318 
3319  assert(OrigVT.isSimple() && "Expecting a simple value type");
3320 
3321  MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3322  switch (OrigSimpleTy) {
3323  default: llvm_unreachable("Unexpected Vector Type");
3324  case MVT::v2i8:
3325  case MVT::v2i16:
3326  return MVT::v2i32;
3327  case MVT::v4i8:
3328  return MVT::v4i16;
3329  }
3330 }
3331 
3333  const EVT &OrigTy,
3334  const EVT &ExtTy,
3335  unsigned ExtOpcode) {
3336  // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3337  // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3338  // 64-bits we need to insert a new extension so that it will be 64-bits.
3339  assert(ExtTy.is128BitVector() && "Unexpected extension size");
3340  if (OrigTy.getSizeInBits() >= 64)
3341  return N;
3342 
3343  // Must extend size to at least 64 bits to be used as an operand for VMULL.
3344  EVT NewVT = getExtensionTo64Bits(OrigTy);
3345 
3346  return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3347 }
3348 
3350  bool isSigned) {
3351  EVT VT = N->getValueType(0);
3352 
3353  if (N->getOpcode() != ISD::BUILD_VECTOR)
3354  return false;
3355 
3356  for (const SDValue &Elt : N->op_values()) {
3357  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3358  unsigned EltSize = VT.getScalarSizeInBits();
3359  unsigned HalfSize = EltSize / 2;
3360  if (isSigned) {
3361  if (!isIntN(HalfSize, C->getSExtValue()))
3362  return false;
3363  } else {
3364  if (!isUIntN(HalfSize, C->getZExtValue()))
3365  return false;
3366  }
3367  continue;
3368  }
3369  return false;
3370  }
3371 
3372  return true;
3373 }
3374 
3376  if (N->getOpcode() == ISD::SIGN_EXTEND ||
3377  N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3378  return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3379  N->getOperand(0)->getValueType(0),
3380  N->getValueType(0),
3381  N->getOpcode());
3382 
3383  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
3384  EVT VT = N->getValueType(0);
3385  SDLoc dl(N);
3386  unsigned EltSize = VT.getScalarSizeInBits() / 2;
3387  unsigned NumElts = VT.getVectorNumElements();
3388  MVT TruncVT = MVT::getIntegerVT(EltSize);
3390  for (unsigned i = 0; i != NumElts; ++i) {
3391  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3392  const APInt &CInt = C->getAPIntValue();
3393  // Element types smaller than 32 bits are not legal, so use i32 elements.
3394  // The values are implicitly truncated so sext vs. zext doesn't matter.
3395  Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3396  }
3397  return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3398 }
3399 
3400 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3401  return N->getOpcode() == ISD::SIGN_EXTEND ||
3402  N->getOpcode() == ISD::ANY_EXTEND ||
3403  isExtendedBUILD_VECTOR(N, DAG, true);
3404 }
3405 
3406 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3407  return N->getOpcode() == ISD::ZERO_EXTEND ||
3408  N->getOpcode() == ISD::ANY_EXTEND ||
3409  isExtendedBUILD_VECTOR(N, DAG, false);
3410 }
3411 
3412 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3413  unsigned Opcode = N->getOpcode();
3414  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3415  SDNode *N0 = N->getOperand(0).getNode();
3416  SDNode *N1 = N->getOperand(1).getNode();
3417  return N0->hasOneUse() && N1->hasOneUse() &&
3418  isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3419  }
3420  return false;
3421 }
3422 
3423 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3424  unsigned Opcode = N->getOpcode();
3425  if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3426  SDNode *N0 = N->getOperand(0).getNode();
3427  SDNode *N1 = N->getOperand(1).getNode();
3428  return N0->hasOneUse() && N1->hasOneUse() &&
3429  isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3430  }
3431  return false;
3432 }
3433 
3434 SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3435  SelectionDAG &DAG) const {
3436  // The rounding mode is in bits 23:22 of the FPSCR.
3437  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3438  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3439  // so that the shift + and get folded into a bitfield extract.
3440  SDLoc dl(Op);
3441 
3442  SDValue Chain = Op.getOperand(0);
3443  SDValue FPCR_64 = DAG.getNode(
3445  {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3446  Chain = FPCR_64.getValue(1);
3447  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3448  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3449  DAG.getConstant(1U << 22, dl, MVT::i32));
3450  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3451  DAG.getConstant(22, dl, MVT::i32));
3452  SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3453  DAG.getConstant(3, dl, MVT::i32));
3454  return DAG.getMergeValues({AND, Chain}, dl);
3455 }
3456 
3457 SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3458  EVT VT = Op.getValueType();
3459 
3460  // If SVE is available then i64 vector multiplications can also be made legal.
3461  bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3462 
3463  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3464  return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3465 
3466  // Multiplications are only custom-lowered for 128-bit vectors so that
3467  // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3468  assert(VT.is128BitVector() && VT.isInteger() &&
3469  "unexpected type for custom-lowering ISD::MUL");
3470  SDNode *N0 = Op.getOperand(0).getNode();
3471  SDNode *N1 = Op.getOperand(1).getNode();
3472  unsigned NewOpc = 0;
3473  bool isMLA = false;
3474  bool isN0SExt = isSignExtended(N0, DAG);
3475  bool isN1SExt = isSignExtended(N1, DAG);
3476  if (isN0SExt && isN1SExt)
3477  NewOpc = AArch64ISD::SMULL;
3478  else {
3479  bool isN0ZExt = isZeroExtended(N0, DAG);
3480  bool isN1ZExt = isZeroExtended(N1, DAG);
3481  if (isN0ZExt && isN1ZExt)
3482  NewOpc = AArch64ISD::UMULL;
3483  else if (isN1SExt || isN1ZExt) {
3484  // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3485  // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3486  if (isN1SExt && isAddSubSExt(N0, DAG)) {
3487  NewOpc = AArch64ISD::SMULL;
3488  isMLA = true;
3489  } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3490  NewOpc = AArch64ISD::UMULL;
3491  isMLA = true;
3492  } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3493  std::swap(N0, N1);
3494  NewOpc = AArch64ISD::UMULL;
3495  isMLA = true;
3496  }
3497  }
3498 
3499  if (!NewOpc) {
3500  if (VT == MVT::v2i64)
3501  // Fall through to expand this. It is not legal.
3502  return SDValue();
3503  else
3504  // Other vector multiplications are legal.
3505  return Op;
3506  }
3507  }
3508 
3509  // Legalize to a S/UMULL instruction
3510  SDLoc DL(Op);
3511  SDValue Op0;
3512  SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3513  if (!isMLA) {
3514  Op0 = skipExtensionForVectorMULL(N0, DAG);
3515  assert(Op0.getValueType().is64BitVector() &&
3516  Op1.getValueType().is64BitVector() &&
3517  "unexpected types for extended operands to VMULL");
3518  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3519  }
3520  // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3521  // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3522  // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3525  EVT Op1VT = Op1.getValueType();
3526  return DAG.getNode(N0->getOpcode(), DL, VT,
3527  DAG.getNode(NewOpc, DL, VT,
3528  DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3529  DAG.getNode(NewOpc, DL, VT,
3530  DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3531 }
3532 
3533 static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3534  int Pattern) {
3535  return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3537 }
3538 
3539 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3540  SelectionDAG &DAG) const {
3541  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3542  SDLoc dl(Op);
3543  switch (IntNo) {
3544  default: return SDValue(); // Don't custom lower most intrinsics.
3545  case Intrinsic::thread_pointer: {
3546  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3547  return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3548  }
3549  case Intrinsic::aarch64_neon_abs: {
3550  EVT Ty = Op.getValueType();
3551  if (Ty == MVT::i64) {
3553  Op.getOperand(1));
3554  Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3555  return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3556  } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3557  return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3558  } else {
3559  report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3560  }
3561  }
3562  case Intrinsic::aarch64_neon_smax:
3563  return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3564  Op.getOperand(1), Op.getOperand(2));
3565  case Intrinsic::aarch64_neon_umax:
3566  return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3567  Op.getOperand(1), Op.getOperand(2));
3568  case Intrinsic::aarch64_neon_smin:
3569  return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3570  Op.getOperand(1), Op.getOperand(2));
3571  case Intrinsic::aarch64_neon_umin:
3572  return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3573  Op.getOperand(1), Op.getOperand(2));
3574 
3575  case Intrinsic::aarch64_sve_sunpkhi:
3576  return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3577  Op.getOperand(1));
3578  case Intrinsic::aarch64_sve_sunpklo:
3579  return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3580  Op.getOperand(1));
3581  case Intrinsic::aarch64_sve_uunpkhi:
3582  return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3583  Op.getOperand(1));
3584  case Intrinsic::aarch64_sve_uunpklo:
3585  return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3586  Op.getOperand(1));
3587  case Intrinsic::aarch64_sve_clasta_n:
3588  return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3589  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3590  case Intrinsic::aarch64_sve_clastb_n:
3591  return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3592  Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3593  case Intrinsic::aarch64_sve_lasta:
3594  return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3595  Op.getOperand(1), Op.getOperand(2));
3596  case Intrinsic::aarch64_sve_lastb:
3597  return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3598  Op.getOperand(1), Op.getOperand(2));
3599  case Intrinsic::aarch64_sve_rev:
3600  return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
3601  Op.getOperand(1));
3602  case Intrinsic::aarch64_sve_tbl:
3603  return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3604  Op.getOperand(1), Op.getOperand(2));
3605  case Intrinsic::aarch64_sve_trn1:
3606  return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3607  Op.getOperand(1), Op.getOperand(2));
3608  case Intrinsic::aarch64_sve_trn2:
3609  return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3610  Op.getOperand(1), Op.getOperand(2));
3611  case Intrinsic::aarch64_sve_uzp1:
3612  return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3613  Op.getOperand(1), Op.getOperand(2));
3614  case Intrinsic::aarch64_sve_uzp2:
3615  return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3616  Op.getOperand(1), Op.getOperand(2));
3617  case Intrinsic::aarch64_sve_zip1:
3618  return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3619  Op.getOperand(1), Op.getOperand(2));
3620  case Intrinsic::aarch64_sve_zip2:
3621  return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3622  Op.getOperand(1), Op.getOperand(2));
3623  case Intrinsic::aarch64_sve_ptrue:
3624  return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3625  Op.getOperand(1));
3626  case Intrinsic::aarch64_sve_clz:
3627  return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
3628  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3629  case Intrinsic::aarch64_sve_cnt: {
3630  SDValue Data = Op.getOperand(3);
3631  // CTPOP only supports integer operands.
3632  if (Data.getValueType().isFloatingPoint())
3633  Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
3634  return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
3635  Op.getOperand(2), Data, Op.getOperand(1));
3636  }
3637  case Intrinsic::aarch64_sve_dupq_lane:
3638  return LowerDUPQLane(Op, DAG);
3639  case Intrinsic::aarch64_sve_convert_from_svbool:
3640  return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3641  Op.getOperand(1));
3642  case Intrinsic::aarch64_sve_fneg:
3643  return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3644  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3645  case Intrinsic::aarch64_sve_frintp:
3646  return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
3647  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3648  case Intrinsic::aarch64_sve_frintm:
3649  return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
3650  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3651  case Intrinsic::aarch64_sve_frinti:
3652  return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3653  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3654  case Intrinsic::aarch64_sve_frintx:
3655  return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
3656  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3657  case Intrinsic::aarch64_sve_frinta:
3658  return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
3659  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3660  case Intrinsic::aarch64_sve_frintn:
3661  return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
3662  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3663  case Intrinsic::aarch64_sve_frintz:
3664  return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
3665  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3666  case Intrinsic::aarch64_sve_ucvtf:
3668  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3669  Op.getOperand(1));
3670  case Intrinsic::aarch64_sve_scvtf:
3672  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3673  Op.getOperand(1));
3674  case Intrinsic::aarch64_sve_fcvtzu:
3676  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3677  Op.getOperand(1));
3678  case Intrinsic::aarch64_sve_fcvtzs:
3680  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3681  Op.getOperand(1));
3682  case Intrinsic::aarch64_sve_fsqrt:
3683  return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
3684  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3685  case Intrinsic::aarch64_sve_frecpx:
3686  return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
3687  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3688  case Intrinsic::aarch64_sve_fabs:
3689  return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
3690  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3691  case Intrinsic::aarch64_sve_abs:
3692  return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
3693  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3694  case Intrinsic::aarch64_sve_neg:
3695  return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
3696  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3697  case Intrinsic::aarch64_sve_convert_to_svbool: {
3698  EVT OutVT = Op.getValueType();
3699  EVT InVT = Op.getOperand(1).getValueType();
3700  // Return the operand if the cast isn't changing type,
3701  // i.e. <n x 16 x i1> -> <n x 16 x i1>
3702  if (InVT == OutVT)
3703  return Op.getOperand(1);
3704  // Otherwise, zero the newly introduced lanes.
3705  SDValue Reinterpret =
3706  DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
3707  SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
3708  SDValue MaskReinterpret =
3709  DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
3710  return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
3711  }
3712 
3713  case Intrinsic::aarch64_sve_insr: {
3714  SDValue Scalar = Op.getOperand(2);
3715  EVT ScalarTy = Scalar.getValueType();
3716  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
3717  Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
3718 
3719  return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
3720  Op.getOperand(1), Scalar);
3721  }
3722  case Intrinsic::aarch64_sve_rbit:
3724  Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
3725  Op.getOperand(1));
3726  case Intrinsic::aarch64_sve_revb:
3727  return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
3728  Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
3729  case Intrinsic::aarch64_sve_sxtb:
3730  return DAG.getNode(
3732  Op.getOperand(2), Op.getOperand(3),
3733  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
3734  Op.getOperand(1));
3735  case Intrinsic::aarch64_sve_sxth:
3736  return DAG.getNode(
3738  Op.getOperand(2), Op.getOperand(3),
3739  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
3740  Op.getOperand(1));
3741  case Intrinsic::aarch64_sve_sxtw:
3742  return DAG.getNode(
3744  Op.getOperand(2), Op.getOperand(3),
3745  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
3746  Op.getOperand(1));
3747  case Intrinsic::aarch64_sve_uxtb:
3748  return DAG.getNode(
3750  Op.getOperand(2), Op.getOperand(3),
3751  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
3752  Op.getOperand(1));
3753  case Intrinsic::aarch64_sve_uxth:
3754  return DAG.getNode(
3756  Op.getOperand(2), Op.getOperand(3),
3757  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
3758  Op.getOperand(1));
3759  case Intrinsic::aarch64_sve_uxtw:
3760  return DAG.getNode(
3762  Op.getOperand(2), Op.getOperand(3),
3763  DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
3764  Op.getOperand(1));
3765 
3766  case Intrinsic::localaddress: {
3767  const auto &MF = DAG.getMachineFunction();
3768  const auto *RegInfo = Subtarget->getRegisterInfo();
3769  unsigned Reg = RegInfo->getLocalAddressRegister(MF);
3770  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
3771  Op.getSimpleValueType());
3772  }
3773 
3774  case Intrinsic::eh_recoverfp: {
3775  // FIXME: This needs to be implemented to correctly handle highly aligned
3776  // stack objects. For now we simply return the incoming FP. Refer D53541
3777  // for more details.
3778  SDValue FnOp = Op.getOperand(1);
3779  SDValue IncomingFPOp = Op.getOperand(2);
3780  GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
3781  auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
3782  if (!Fn)
3784  "llvm.eh.recoverfp must take a function as the first argument");
3785  return IncomingFPOp;
3786  }
3787 
3788  case Intrinsic::aarch64_neon_vsri:
3789  case Intrinsic::aarch64_neon_vsli: {
3790  EVT Ty = Op.getValueType();
3791 
3792  if (!Ty.isVector())
3793  report_fatal_error("Unexpected type for aarch64_neon_vsli");
3794 
3795  assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
3796 
3797  bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
3798  unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
3799  return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
3800  Op.getOperand(3));
3801  }
3802 
3803  case Intrinsic::aarch64_neon_srhadd:
3804  case Intrinsic::aarch64_neon_urhadd:
3805  case Intrinsic::aarch64_neon_shadd:
3806  case Intrinsic::aarch64_neon_uhadd: {
3807  bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
3808  IntNo == Intrinsic::aarch64_neon_shadd);
3809  bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
3810  IntNo == Intrinsic::aarch64_neon_urhadd);
3811  unsigned Opcode =
3812  IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
3813  : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
3814  return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
3815  Op.getOperand(2));
3816  }
3817 
3818  case Intrinsic::aarch64_neon_uabd: {
3819  return DAG.getNode(AArch64ISD::UABD, dl, Op.getValueType(),
3820  Op.getOperand(1), Op.getOperand(2));
3821  }
3822  case Intrinsic::aarch64_neon_sabd: {
3823  return DAG.getNode(AArch64ISD::SABD, dl, Op.getValueType(),
3824  Op.getOperand(1), Op.getOperand(2));
3825  }
3826  }
3827 }
3828 
3829 bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
3830  if (VT.getVectorElementType() == MVT::i32 &&
3832  return true;
3833 
3834  return false;
3835 }
3836 
3837 bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3838  return ExtVal.getValueType().isScalableVector();
3839 }
3840 
3841 unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
3842  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
3843  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
3845  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
3847  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
3849  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
3851  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
3853  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
3855  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
3857  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
3859  };
3860  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
3861  return AddrModes.find(Key)->second;
3862 }
3863 
3864 unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
3865  std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
3866  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
3868  {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
3870  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
3872  {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
3874  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
3876  {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
3878  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
3880  {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
3882  };
3883  auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
3884  return AddrModes.find(Key)->second;
3885 }
3886 
3887 unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
3888  switch (Opcode) {
3889  default:
3890  llvm_unreachable("unimplemented opcode");
3891  return Opcode;
3906  }
3907 }
3908 
3910  unsigned Opcode = Index.getOpcode();
3911  if (Opcode == ISD::SIGN_EXTEND_INREG)
3912  return true;
3913 
3914  if (Opcode == ISD::AND) {
3915  SDValue Splat = Index.getOperand(1);
3916  if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
3917  return false;
3918  ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
3919  if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
3920  return false;
3921  return true;
3922  }
3923 
3924  return false;
3925 }
3926 
3927 // If the base pointer of a masked gather or scatter is null, we
3928 // may be able to swap BasePtr & Index and use the vector + register
3929 // or vector + immediate addressing mode, e.g.
3930 // VECTOR + REGISTER:
3931 // getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
3932 // -> getelementptr %offset, <vscale x N x T> %indices
3933 // VECTOR + IMMEDIATE:
3934 // getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
3935 // -> getelementptr #x, <vscale x N x T> %indices
3937  unsigned &Opcode, bool IsGather,
3938  SelectionDAG &DAG) {
3939  if (!isNullConstant(BasePtr))
3940  return;
3941 
3942  ConstantSDNode *Offset = nullptr;
3943  if (Index.getOpcode() == ISD::ADD)
3944  if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
3945  if (isa<ConstantSDNode>(SplatVal))
3946  Offset = cast<ConstantSDNode>(SplatVal);
3947  else {
3948  BasePtr = SplatVal;
3949  Index = Index->getOperand(0);
3950  return;
3951  }
3952  }
3953 
3954  unsigned NewOp =
3956 
3957  if (!Offset) {
3958  std::swap(BasePtr, Index);
3959  Opcode = NewOp;
3960  return;
3961  }
3962 
3963  uint64_t OffsetVal = Offset->getZExtValue();
3964  unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
3965  auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
3966 
3967  if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
3968  // Index is out of range for the immediate addressing mode
3969  BasePtr = ConstOffset;
3970  Index = Index->getOperand(0);
3971  return;
3972  }
3973 
3974  // Immediate is in range
3975  Opcode = NewOp;
3976  BasePtr = Index->getOperand(0);
3977  Index = ConstOffset;
3978 }
3979 
3980 SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
3981  SelectionDAG &DAG) const {
3982  SDLoc DL(Op);
3983  MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
3984  assert(MGT && "Can only custom lower gather load nodes");
3985 
3986  SDValue Index = MGT->getIndex();
3987  SDValue Chain = MGT->getChain();
3988  SDValue PassThru = MGT->getPassThru();
3989  SDValue Mask = MGT->getMask();
3990  SDValue BasePtr = MGT->getBasePtr();
3991  ISD::LoadExtType ExtTy = MGT->getExtensionType();
3992 
3993  ISD::MemIndexType IndexType = MGT->getIndexType();
3994  bool IsScaled =
3995  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
3996  bool IsSigned =
3997  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
3998  bool IdxNeedsExtend =
4000  Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4001  bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4002 
4003  EVT VT = PassThru.getSimpleValueType();
4004  EVT MemVT = MGT->getMemoryVT();
4005  SDValue InputVT = DAG.getValueType(MemVT);
4006 
4007  if (VT.getVectorElementType() == MVT::bf16 &&
4008  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4009  return SDValue();
4010 
4011  // Handle FP data by using an integer gather and casting the result.
4012  if (VT.isFloatingPoint()) {
4013  EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4014  PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4015  InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4016  }
4017 
4018  SDVTList VTs = DAG.getVTList(PassThru.getSimpleValueType(), MVT::Other);
4019 
4021  Index = Index.getOperand(0);
4022 
4023  unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4024  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4025  /*isGather=*/true, DAG);
4026 
4027  if (ResNeedsSignExtend)
4028  Opcode = getSignExtendedGatherOpcode(Opcode);
4029 
4030  SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT, PassThru};
4031  SDValue Gather = DAG.getNode(Opcode, DL, VTs, Ops);
4032 
4033  if (VT.isFloatingPoint()) {
4034  SDValue Cast = getSVESafeBitCast(VT, Gather, DAG);
4035  return DAG.getMergeValues({Cast, Gather}, DL);
4036  }
4037 
4038  return Gather;
4039 }
4040 
4041 SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4042  SelectionDAG &DAG) const {
4043  SDLoc DL(Op);
4044  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4045  assert(MSC && "Can only custom lower scatter store nodes");
4046 
4047  SDValue Index = MSC->getIndex();
4048  SDValue Chain = MSC->getChain();
4049  SDValue StoreVal = MSC->getValue();
4050  SDValue Mask = MSC->getMask();
4051  SDValue BasePtr = MSC->getBasePtr();
4052 
4053  ISD::MemIndexType IndexType = MSC->getIndexType();
4054  bool IsScaled =
4055  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4056  bool IsSigned =
4057  IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4058  bool NeedsExtend =
4060  Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4061 
4062  EVT VT = StoreVal.getSimpleValueType();
4063  SDVTList VTs = DAG.getVTList(MVT::Other);
4064  EVT MemVT = MSC->getMemoryVT();
4065  SDValue InputVT = DAG.getValueType(MemVT);
4066 
4067  if (VT.getVectorElementType() == MVT::bf16 &&
4068  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4069  return SDValue();
4070 
4071  // Handle FP data by casting the data so an integer scatter can be used.
4072  if (VT.isFloatingPoint()) {
4073  EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4074  StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4075  InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4076  }
4077 
4079  Index = Index.getOperand(0);
4080 
4081  unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4082  selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4083  /*isGather=*/false, DAG);
4084 
4085  SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4086  return DAG.getNode(Opcode, DL, VTs, Ops);
4087 }
4088 
4089 // Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4091  EVT VT, EVT MemVT,
4092  SelectionDAG &DAG) {
4093  assert(VT.isVector() && "VT should be a vector type");
4094  assert(MemVT == MVT::v4i8 && VT == MVT::v4i16);
4095 
4096  SDValue Value = ST->getValue();
4097 
4098  // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4099  // the word lane which represent the v4i8 subvector. It optimizes the store
4100  // to:
4101  //
4102  // xtn v0.8b, v0.8h
4103  // str s0, [x0]
4104 
4105  SDValue Undef = DAG.getUNDEF(MVT::i16);
4106  SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4107  {Undef, Undef, Undef, Undef});
4108 
4109  SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4110  Value, UndefVec);
4111  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4112 
4113  Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4114  SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4115  Trunc, DAG.getConstant(0, DL, MVT::i64));
4116 
4117  return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4118  ST->getBasePtr(), ST->getMemOperand());
4119 }
4120 
4121 // Custom lowering for any store, vector or scalar and/or default or with
4122 // a truncate operations. Currently only custom lower truncate operation
4123 // from vector v4i16 to v4i8 or volatile stores of i128.
4124 SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4125  SelectionDAG &DAG) const {
4126  SDLoc Dl(Op);
4127  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4128  assert (StoreNode && "Can only custom lower store nodes");
4129 
4130  SDValue Value = StoreNode->getValue();
4131 
4132  EVT VT = Value.getValueType();
4133  EVT MemVT = StoreNode->getMemoryVT();
4134 
4135  if (VT.isVector()) {
4136  if (useSVEForFixedLengthVectorVT(VT))
4137  return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4138 
4139  unsigned AS = StoreNode->getAddressSpace();
4140  Align Alignment = StoreNode->getAlign();
4141  if (Alignment < MemVT.getStoreSize() &&
4142  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment.value(),
4143  StoreNode->getMemOperand()->getFlags(),
4144  nullptr)) {
4145  return scalarizeVectorStore(StoreNode, DAG);
4146  }
4147 
4148  if (StoreNode->isTruncatingStore()) {
4149  return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4150  }
4151  // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4152  // the custom lowering, as there are no un-paired non-temporal stores and
4153  // legalization will break up 256 bit inputs.
4155  if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4156  EC.isKnownEven() &&
4157  ((MemVT.getScalarSizeInBits() == 8u ||
4158  MemVT.getScalarSizeInBits() == 16u ||
4159  MemVT.getScalarSizeInBits() == 32u ||
4160  MemVT.getScalarSizeInBits() == 64u))) {
4161  SDValue Lo =
4163  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4164  StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4165  SDValue Hi =
4167  MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4168  StoreNode->getValue(),
4169  DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4172  {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4173  StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4174  return Result;
4175  }
4176  } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4177  assert(StoreNode->getValue()->getValueType(0) == MVT::i128);
4178  SDValue Lo =
4179  DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4180  DAG.getConstant(0, Dl, MVT::i64));
4181  SDValue Hi =
4182  DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4183  DAG.getConstant(1, Dl, MVT::i64));
4186  {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4187  StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4188  return Result;
4189  }
4190 
4191  return SDValue();
4192 }
4193 
4194 // Generate SUBS and CSEL for integer abs.
4195 SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4196  MVT VT = Op.getSimpleValueType();
4197 
4198  if (VT.isVector())
4199  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4200 
4201  SDLoc DL(Op);
4202  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4203  Op.getOperand(0));
4204  // Generate SUBS & CSEL.
4205  SDValue Cmp =
4206  DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4207  Op.getOperand(0), DAG.getConstant(0, DL, VT));
4208  return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4210  Cmp.getValue(1));
4211 }
4212 
4214  SelectionDAG &DAG) const {
4215  LLVM_DEBUG(dbgs() << "Custom lowering: ");
4216  LLVM_DEBUG(Op.dump());
4217 
4218  switch (Op.getOpcode()) {
4219  default:
4220  llvm_unreachable("unimplemented operand");
4221  return SDValue();
4222  case ISD::BITCAST:
4223  return LowerBITCAST(Op, DAG);
4224  case ISD::GlobalAddress:
4225  return LowerGlobalAddress(Op, DAG);
4226  case ISD::GlobalTLSAddress:
4227  return LowerGlobalTLSAddress(Op, DAG);
4228  case ISD::SETCC:
4229  case ISD::STRICT_FSETCC:
4230  case ISD::STRICT_FSETCCS:
4231  return LowerSETCC(Op, DAG);
4232  case ISD::BR_CC:
4233  return LowerBR_CC(Op, DAG);
4234  case ISD::SELECT:
4235  return LowerSELECT(Op, DAG);
4236  case ISD::SELECT_CC:
4237  return LowerSELECT_CC(Op, DAG);
4238  case ISD::JumpTable:
4239  return LowerJumpTable(Op, DAG);
4240  case ISD::BR_JT:
4241  return LowerBR_JT(Op, DAG);
4242  case ISD::ConstantPool:
4243  return LowerConstantPool(Op, DAG);
4244  case ISD::BlockAddress:
4245  return LowerBlockAddress(Op, DAG);
4246  case ISD::VASTART:
4247  return LowerVASTART(Op, DAG);
4248  case ISD::VACOPY:
4249  return LowerVACOPY(Op, DAG);
4250  case ISD::VAARG:
4251  return LowerVAARG(Op, DAG);
4252  case ISD::ADDC:
4253  case ISD::ADDE:
4254  case ISD::SUBC:
4255  case ISD::SUBE:
4256  return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4257  case ISD::SADDO:
4258  case ISD::UADDO:
4259  case ISD::SSUBO:
4260  case ISD::USUBO:
4261  case ISD::SMULO:
4262  case ISD::UMULO:
4263  return LowerXALUO(Op, DAG);
4264  case ISD::FADD:
4265  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4266  case ISD::FSUB:
4267  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4268  case ISD::FMUL:
4269  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4270  case ISD::FMA:
4271  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4272  case ISD::FDIV:
4273  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4274  case ISD::FNEG:
4275  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4276  case ISD::FCEIL:
4277  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4278  case ISD::FFLOOR:
4279  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4280  case ISD::FNEARBYINT:
4281  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4282  case ISD::FRINT:
4283  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4284  case ISD::FROUND:
4285  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4286  case ISD::FROUNDEVEN:
4287  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4288  case ISD::FTRUNC:
4289  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4290  case ISD::FSQRT:
4291  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4292  case ISD::FABS:
4293  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4294  case ISD::FP_ROUND:
4295  case ISD::STRICT_FP_ROUND:
4296  return LowerFP_ROUND(Op, DAG);
4297  case ISD::FP_EXTEND:
4298  return LowerFP_EXTEND(Op, DAG);
4299  case ISD::FRAMEADDR:
4300  return LowerFRAMEADDR(Op, DAG);
4301  case ISD::SPONENTRY:
4302  return LowerSPONENTRY(Op, DAG);
4303  case ISD::RETURNADDR:
4304  return LowerRETURNADDR(Op, DAG);
4305  case ISD::ADDROFRETURNADDR:
4306  return LowerADDROFRETURNADDR(Op, DAG);
4307  case ISD::CONCAT_VECTORS:
4308  return LowerCONCAT_VECTORS(Op, DAG);
4310  return LowerINSERT_VECTOR_ELT(Op, DAG);
4312  return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4313  case ISD::BUILD_VECTOR:
4314  return LowerBUILD_VECTOR(Op, DAG);
4315  case ISD::VECTOR_SHUFFLE:
4316  return LowerVECTOR_SHUFFLE(Op, DAG);
4317  case ISD::SPLAT_VECTOR:
4318  return LowerSPLAT_VECTOR(Op, DAG);
4320  return LowerEXTRACT_SUBVECTOR(Op, DAG);
4321  case ISD::INSERT_SUBVECTOR:
4322  return LowerINSERT_SUBVECTOR(Op, DAG);
4323  case ISD::SDIV:
4324  case ISD::UDIV:
4325  return LowerDIV(Op, DAG);
4326  case ISD::SMIN:
4327  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
4328  /*OverrideNEON=*/true);
4329  case ISD::UMIN:
4330  return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
4331  /*OverrideNEON=*/true);
4332  case ISD::SMAX:
4333  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
4334  /*OverrideNEON=*/true);
4335  case ISD::UMAX:
4336  return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
4337  /*OverrideNEON=*/true);
4338  case ISD::SRA:
4339  case ISD::SRL:
4340  case ISD::SHL:
4341  return LowerVectorSRA_SRL_SHL(Op, DAG);
4342  case ISD::SHL_PARTS:
4343  return LowerShiftLeftParts(Op, DAG);
4344  case ISD::SRL_PARTS:
4345  case ISD::SRA_PARTS:
4346  return LowerShiftRightParts(Op, DAG);
4347  case ISD::CTPOP:
4348  return LowerCTPOP(Op, DAG);
4349  case ISD::FCOPYSIGN:
4350  return LowerFCOPYSIGN(Op, DAG);
4351  case ISD::OR:
4352  return LowerVectorOR(Op, DAG);
4353  case ISD::XOR:
4354  return LowerXOR(Op, DAG);
4355  case ISD::PREFETCH:
4356  return LowerPREFETCH(Op, DAG);
4357  case ISD::SINT_TO_FP:
4358  case ISD::UINT_TO_FP:
4361  return LowerINT_TO_FP(Op, DAG);
4362  case ISD::FP_TO_SINT:
4363  case ISD::FP_TO_UINT:
4366  return LowerFP_TO_INT(Op, DAG);
4367  case ISD::FSINCOS:
4368  return LowerFSINCOS(Op, DAG);
4369  case ISD::FLT_ROUNDS_:
4370  return LowerFLT_ROUNDS_(Op, DAG);
4371  case ISD::MUL:
4372  return LowerMUL(Op, DAG);
4374  return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4375  case ISD::STORE:
4376  return LowerSTORE(Op, DAG);
4377  case ISD::MGATHER:
4378  return LowerMGATHER(Op, DAG);
4379  case ISD::MSCATTER:
4380  return LowerMSCATTER(Op, DAG);
4382  return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4383  case ISD::VECREDUCE_ADD:
4384  case ISD::VECREDUCE_AND:
4385  case ISD::VECREDUCE_OR:
4386  case ISD::VECREDUCE_XOR:
4387  case ISD::VECREDUCE_SMAX:
4388  case ISD::VECREDUCE_SMIN:
4389  case ISD::VECREDUCE_UMAX:
4390  case ISD::VECREDUCE_UMIN:
4391  case ISD::VECREDUCE_FADD:
4392  case ISD::VECREDUCE_FMAX:
4393  case ISD::VECREDUCE_FMIN:
4394  return LowerVECREDUCE(Op, DAG);
4395  case ISD::ATOMIC_LOAD_SUB:
4396  return LowerATOMIC_LOAD_SUB(Op, DAG);
4397  case ISD::ATOMIC_LOAD_AND:
4398  return LowerATOMIC_LOAD_AND(Op, DAG);
4400  return LowerDYNAMIC_STACKALLOC(Op, DAG);
4401  case ISD::VSCALE:
4402  return LowerVSCALE(Op, DAG);
4403  case ISD::ANY_EXTEND:
4404  case ISD::SIGN_EXTEND:
4405  case ISD::ZERO_EXTEND:
4406  return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4407  case ISD::SIGN_EXTEND_INREG: {
4408  // Only custom lower when ExtraVT has a legal byte based element type.
4409  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4410  EVT ExtraEltVT = ExtraVT.getVectorElementType();
4411  if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4412  (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4413  return SDValue();
4414 
4415  return LowerToPredicatedOp(Op, DAG,
4417  }
4418  case ISD::TRUNCATE:
4419  return LowerTRUNCATE(Op, DAG);
4420  case ISD::LOAD:
4421  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
4422  return LowerFixedLengthVectorLoadToSVE(Op, DAG);
4423  llvm_unreachable("Unexpected request to lower ISD::LOAD");
4424  case ISD::ADD:
4425  return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
4426  case ISD::AND:
4427  return LowerToScalableOp(Op, DAG);
4428  case ISD::SUB:
4429  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
4430  case ISD::FMAXNUM:
4431  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
4432  case ISD::FMINNUM:
4433  return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
4434  case ISD::VSELECT:
4435  return LowerFixedLengthVectorSelectToSVE(Op, DAG);
4436  case ISD::ABS:
4437  return LowerABS(Op, DAG);
4438  case ISD::BITREVERSE:
4439  return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
4440  /*OverrideNEON=*/true);
4441  case ISD::BSWAP:
4442  return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
4443  case ISD::CTLZ:
4444  return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
4445  /*OverrideNEON=*/true);
4446  case ISD::CTTZ:
4447  return LowerCTTZ(Op, DAG);
4448  }
4449 }
4450 
4452  return !Subtarget->useSVEForFixedLengthVectors();
4453 }
4454 
4455 bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
4456  EVT VT, bool OverrideNEON) const {
4457  if (!Subtarget->useSVEForFixedLengthVectors())
4458  return false;
4459 
4460  if (!VT.isFixedLengthVector())
4461  return false;
4462 
4463  // Don't use SVE for vectors we cannot scalarize if required.
4464  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
4465  // Fixed length predicates should be promoted to i8.
4466  // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
4467  case MVT::i1:
4468  default:
4469  return false;
4470  case MVT::i8:
4471  case MVT::i16:
4472  case MVT::i32:
4473  case MVT::i64:
4474  case MVT::f16:
4475  case MVT::f32:
4476  case MVT::f64:
4477  break;
4478  }
4479 
4480  // All SVE implementations support NEON sized vectors.
4481  if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
4482  return true;
4483 
4484  // Ensure NEON MVTs only belong to a single register class.
4485  if (VT.getFixedSizeInBits() <= 128)
4486  return false;
4487 
4488  // Don't use SVE for types that don't fit.
4489  if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
4490  return false;
4491 
4492  // TODO: Perhaps an artificial restriction, but worth having whilst getting
4493  // the base fixed length SVE support in place.
4494  if (!VT.isPow2VectorType())
4495  return false;
4496 
4497  return true;
4498 }
4499 
4500 //===----------------------------------------------------------------------===//
4501 // Calling Convention Implementation
4502 //===----------------------------------------------------------------------===//
4503 
4504 /// Selects the correct CCAssignFn for a given CallingConvention value.
4506  bool IsVarArg) const {
4507  switch (CC) {
4508  default:
4509  report_fatal_error("Unsupported calling convention.");
4511  return CC_AArch64_WebKit_JS;
4512  case CallingConv::GHC:
4513  return CC_AArch64_GHC;
4514  case CallingConv::C:
4515  case CallingConv::Fast:
4518  case CallingConv::Swift:
4519  if (Subtarget->isTargetWindows() && IsVarArg)
4520  return CC_AArch64_Win64_VarArg;
4521  if (!Subtarget->isTargetDarwin())
4522  return CC_AArch64_AAPCS;
4523  if (!IsVarArg)
4524  return CC_AArch64_DarwinPCS;
4525  return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
4527  case CallingConv::Win64:
4528  return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
4533  return CC_AArch64_AAPCS;
4534  }
4535 }
4536 
4537 CCAssignFn *
4541 }
4542 
4543 SDValue AArch64TargetLowering::LowerFormalArguments(
4544  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4545  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4546  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4547  MachineFunction &MF = DAG.getMachineFunction();
4548  MachineFrameInfo &MFI = MF.getFrameInfo();
4549  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
4550 
4551  // Assign locations to all of the incoming arguments.
4553  DenseMap<unsigned, SDValue> CopiedRegs;
4554  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4555  *DAG.getContext());
4556 
4557  // At this point, Ins[].VT may already be promoted to i32. To correctly
4558  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
4559  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
4560  // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
4561  // we use a special version of AnalyzeFormalArguments to pass in ValVT and
4562  // LocVT.
4563  unsigned NumArgs = Ins.size();
4565  unsigned CurArgIdx = 0;
4566  for (unsigned i = 0; i != NumArgs; ++i) {
4567  MVT ValVT = Ins[i].VT;
4568  if (Ins[i].isOrigArg()) {
4569  std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
4570  CurArgIdx = Ins[i].getOrigArgIndex();
4571 
4572  // Get type of the original argument.
4573  EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
4574  /*AllowUnknown*/ true);
4575  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
4576  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
4577  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
4578  ValVT = MVT::i8;
4579  else if (ActualMVT == MVT::i16)
4580  ValVT = MVT::i16;
4581  }
4582  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
4583  bool Res =
4584  AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
4585  assert(!Res && "Call operand has unhandled type");
4586  (void)Res;
4587  }
4588  SmallVector<SDValue, 16> ArgValues;
4589  unsigned ExtraArgLocs = 0;
4590  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4591  CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
4592 
4593  if (Ins[i].Flags.isByVal()) {
4594  // Byval is used for HFAs in the PCS, but the system should work in a
4595  // non-compliant manner for larger structs.
4596  EVT PtrVT = getPointerTy(DAG.getDataLayout());
4597  int Size = Ins[i].Flags.getByValSize();
4598  unsigned NumRegs = (Size + 7) / 8;
4599 
4600  // FIXME: This works on big-endian for composite byvals, which are the common
4601  // case. It should also work for fundamental types too.
4602  unsigned FrameIdx =
4603  MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
4604  SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
4605  InVals.push_back(FrameIdxN);
4606 
4607  continue;
4608  }
4609 
4610  SDValue ArgValue;
4611  if (VA.isRegLoc()) {
4612  // Arguments stored in registers.
4613  EVT RegVT = VA.getLocVT();
4614  const TargetRegisterClass *RC;
4615 
4616  if (RegVT == MVT::i32)
4617  RC = &AArch64::GPR32RegClass;
4618  else if (RegVT == MVT::i64)
4619  RC = &AArch64::GPR64RegClass;
4620  else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4621  RC = &AArch64::FPR16RegClass;
4622  else if (RegVT == MVT::f32)
4623  RC = &AArch64::FPR32RegClass;
4624  else if (RegVT == MVT::f64 || RegVT.is64BitVector())
4625  RC = &AArch64::FPR64RegClass;
4626  else if (RegVT == MVT::f128 || RegVT.is128BitVector())
4627  RC = &AArch64::FPR128RegClass;
4628  else if (RegVT.isScalableVector() &&
4629  RegVT.getVectorElementType() == MVT::i1)
4630  RC = &AArch64::PPRRegClass;
4631  else if (RegVT.isScalableVector())
4632  RC = &AArch64::ZPRRegClass;
4633  else
4634  llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4635 
4636  // Transform the arguments in physical registers into virtual ones.
4637  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
4638  ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
4639 
4640  // If this is an 8, 16 or 32-bit value, it is really passed promoted
4641  // to 64 bits. Insert an assert[sz]ext to capture this, then
4642  // truncate to the right size.
4643  switch (VA.getLocInfo()) {
4644  default:
4645  llvm_unreachable("Unknown loc info!");
4646  case CCValAssign::Full:
4647  break;
4648  case CCValAssign::Indirect:
4649  assert(VA.getValVT().isScalableVector() &&
4650  "Only scalable vectors can be passed indirectly");
4651  break;
4652  case CCValAssign::BCvt:
4653  ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
4654  break;
4655  case CCValAssign::AExt:
4656  case CCValAssign::SExt:
4657  case CCValAssign::ZExt:
4658  break;
4660  ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
4661  DAG.getConstant(32, DL, RegVT));
4662  ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
4663  break;
4664  }
4665  } else { // VA.isRegLoc()
4666  assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
4667  unsigned ArgOffset = VA.getLocMemOffset();
4668  unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
4669  ? VA.getLocVT().getSizeInBits()
4670  : VA.getValVT().getSizeInBits()) / 8;
4671 
4672  uint32_t BEAlign = 0;
4673  if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
4674  !Ins[i].Flags.isInConsecutiveRegs())
4675  BEAlign = 8 - ArgSize;
4676 
4677  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
4678 
4679  // Create load nodes to retrieve arguments from the stack.
4680  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4681 
4682  // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
4684  MVT MemVT = VA.getValVT();
4685 
4686  switch (VA.getLocInfo()) {
4687  default:
4688  break;
4689  case CCValAssign::Trunc:
4690  case CCValAssign::BCvt:
4691  MemVT = VA.getLocVT();
4692  break;
4693  case CCValAssign::Indirect:
4694  assert(VA.getValVT().isScalableVector() &&
4695  "Only scalable vectors can be passed indirectly");
4696  MemVT = VA.getLocVT();
4697  break;
4698  case CCValAssign::SExt:
4699  ExtType = ISD::SEXTLOAD;
4700  break;
4701  case CCValAssign::ZExt:
4702  ExtType = ISD::ZEXTLOAD;
4703  break;
4704  case CCValAssign::AExt:
4705  ExtType = ISD::EXTLOAD;
4706  break;
4707  }
4708 
4709  ArgValue = DAG.getExtLoad(
4710  ExtType, DL, VA.getLocVT(), Chain, FIN,
4712  MemVT);
4713 
4714  }
4715 
4716  if (VA.getLocInfo() == CCValAssign::Indirect) {
4717  assert(VA.getValVT().isScalableVector() &&
4718  "Only scalable vectors can be passed indirectly");
4719 
4720  uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
4721  unsigned NumParts = 1;
4722  if (Ins[i].Flags.isInConsecutiveRegs()) {
4723  assert(!Ins[i].Flags.isInConsecutiveRegsLast());
4724  while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
4725  ++NumParts;
4726  }
4727 
4728  MVT PartLoad = VA.getValVT();
4729  SDValue Ptr = ArgValue;
4730 
4731  // Ensure we generate all loads for each tuple part, whilst updating the
4732  // pointer after each load correctly using vscale.
4733  while (NumParts > 0) {
4734  ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
4735  InVals.push_back(ArgValue);
4736  NumParts--;
4737  if (NumParts > 0) {
4738  SDValue BytesIncrement = DAG.getVScale(
4739  DL, Ptr.getValueType(),
4740  APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
4741  SDNodeFlags Flags;
4742  Flags.setNoUnsignedWrap(true);
4743  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
4744  BytesIncrement, Flags);
4745  ExtraArgLocs++;
4746  i++;
4747  }
4748  }
4749  } else {
4750  if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
4751  ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
4752  ArgValue, DAG.getValueType(MVT::i32));
4753  InVals.push_back(ArgValue);
4754  }
4755  }
4756  assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
4757 
4758  // varargs
4760  if (isVarArg) {
4761  if (!Subtarget->isTargetDarwin() || IsWin64) {
4762  // The AAPCS variadic function ABI is identical to the non-variadic
4763  // one. As a result there may be more arguments in registers and we should
4764  // save them for future reference.
4765  // Win64 variadic functions also pass arguments in registers, but all float
4766  // arguments are passed in integer registers.
4767  saveVarArgRegisters(CCInfo, DAG, DL, Chain);
4768  }
4769 
4770  // This will point to the next argument passed via stack.
4771  unsigned StackOffset = CCInfo.getNextStackOffset();
4772  // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
4773  StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
4774  FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
4775 
4776  if (MFI.hasMustTailInVarArgFunc()) {
4777  SmallVector<MVT, 2> RegParmTypes;
4778  RegParmTypes.push_back(MVT::i64);
4779  RegParmTypes.push_back(MVT::f128);
4780  // Compute the set of forwarded registers. The rest are scratch.
4782  FuncInfo->getForwardedMustTailRegParms();
4783  CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
4785 
4786  // Conservatively forward X8, since it might be used for aggregate return.
4787  if (!CCInfo.isAllocated(AArch64::X8)) {
4788  unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
4789  Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
4790  }
4791  }
4792  }
4793 
4794  // On Windows, InReg pointers must be returned, so record the pointer in a
4795  // virtual register at the start of the function so it can be returned in the
4796  // epilogue.
4797  if (IsWin64) {
4798  for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
4799  if (Ins[I].Flags.isInReg()) {
4800  assert(!FuncInfo->getSRetReturnReg());
4801 
4802  MVT PtrTy = getPointerTy(DAG.getDataLayout());
4803  Register Reg =
4805  FuncInfo->setSRetReturnReg(Reg);
4806 
4807  SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
4808  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
4809  break;
4810  }
4811  }
4812  }
4813 
4814  unsigned StackArgSize = CCInfo.getNextStackOffset();
4815  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4816  if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
4817  // This is a non-standard ABI so by fiat I say we're allowed to make full
4818  // use of the stack area to be popped, which must be aligned to 16 bytes in
4819  // any case:
4820  StackArgSize = alignTo(StackArgSize, 16);
4821 
4822  // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
4823  // a multiple of 16.
4824  FuncInfo->setArgumentStackToRestore(StackArgSize);
4825 
4826  // This realignment carries over to the available bytes below. Our own
4827  // callers will guarantee the space is free by giving an aligned value to
4828  // CALLSEQ_START.
4829  }
4830  // Even if we're not expected to free up the space, it's useful to know how
4831  // much is there while considering tail calls (because we can reuse it).
4832  FuncInfo->setBytesInStackArgArea(StackArgSize);
4833 
4834  if (Subtarget->hasCustomCallingConv())
4835  Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
4836 
4837  return Chain;
4838 }
4839 
4840 void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
4841  SelectionDAG &DAG,
4842  const SDLoc &DL,
4843  SDValue &Chain) const {
4844  MachineFunction &MF = DAG.getMachineFunction();
4845  MachineFrameInfo &MFI = MF.getFrameInfo();
4847  auto PtrVT = getPointerTy(DAG.getDataLayout());
4848  bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
4849 
4850  SmallVector<SDValue, 8> MemOps;
4851 
4852  static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
4853  AArch64::X3, AArch64::X4, AArch64::X5,
4854  AArch64::X6, AArch64::X7 };
4855  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
4856  unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
4857 
4858  unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
4859  int GPRIdx = 0;
4860  if (GPRSaveSize != 0) {
4861  if (IsWin64) {
4862  GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
4863  if (GPRSaveSize & 15)
4864  // The extra size here, if triggered, will always be 8.
4865  MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
4866  } else
4867  GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
4868 
4869  SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
4870 
4871  for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
4872  unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
4873  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
4874  SDValue Store = DAG.getStore(
4875  Val.getValue(1), DL, Val, FIN,
4876  IsWin64
4878  GPRIdx,
4879  (i - FirstVariadicGPR) * 8)
4880  : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
4881  MemOps.push_back(Store);
4882  FIN =
4883  DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
4884  }
4885  }
4886  FuncInfo->setVarArgsGPRIndex(GPRIdx);
4887  FuncInfo->setVarArgsGPRSize(GPRSaveSize);
4888 
4889  if (Subtarget->hasFPARMv8() && !IsWin64) {
4890  static const MCPhysReg FPRArgRegs[] = {
4891  AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
4892  AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
4893  static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
4894  unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
4895 
4896  unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
4897  int FPRIdx = 0;
4898  if (FPRSaveSize != 0) {
4899  FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
4900 
4901  SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
4902 
4903  for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
4904  unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
4905  SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
4906 
4907  SDValue Store = DAG.getStore(
4908  Val.getValue(1), DL, Val, FIN,
4910  MemOps.push_back(Store);
4911  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
4912  DAG.getConstant(16, DL, PtrVT));
4913  }
4914  }
4915  FuncInfo->setVarArgsFPRIndex(FPRIdx);
4916  FuncInfo->setVarArgsFPRSize(FPRSaveSize);
4917  }
4918 
4919  if (!MemOps.empty()) {
4920  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4921  }
4922 }
4923 
4924 /// LowerCallResult - Lower the result values of a call into the
4925 /// appropriate copies out of appropriate physical registers.
4926 SDValue AArch64TargetLowering::LowerCallResult(
4927  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4928  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
4929  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
4930  SDValue ThisVal) const {
4931  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
4932  // Assign locations to each value returned by this call.
4934  DenseMap<unsigned, SDValue> CopiedRegs;
4935  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4936  *DAG.getContext());
4937  CCInfo.AnalyzeCallResult(Ins, RetCC);
4938 
4939  // Copy all of the result registers out of their specified physreg.
4940  for (unsigned i = 0; i != RVLocs.size(); ++i) {
4941  CCValAssign VA = RVLocs[i];
4942 
4943  // Pass 'this' value directly from the argument to return value, to avoid
4944  // reg unit interference
4945  if (i == 0 && isThisReturn) {
4946  assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
4947  "unexpected return calling convention register assignment");
4948  InVals.push_back(ThisVal);
4949  continue;
4950  }
4951 
4952  // Avoid copying a physreg twice since RegAllocFast is incompetent and only
4953  // allows one use of a physreg per block.
4954  SDValue Val = CopiedRegs.lookup(VA.getLocReg());
4955  if (!Val) {
4956  Val =
4957  DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
4958  Chain = Val.getValue(1);
4959  InFlag = Val.getValue(2);
4960  CopiedRegs[VA.getLocReg()] = Val;
4961  }
4962 
4963  switch (VA.getLocInfo()) {
4964  default:
4965  llvm_unreachable("Unknown loc info!");
4966  case CCValAssign::Full:
4967  break;
4968  case CCValAssign::BCvt:
4969  Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4970  break;
4972  Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
4973  DAG.getConstant(32, DL, VA.getLocVT()));
4975  case CCValAssign::AExt:
4977  case CCValAssign::ZExt:
4978  Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
4979  break;
4980  }
4981 
4982  InVals.push_back(Val);
4983  }
4984 
4985  return Chain;
4986 }
4987 
4988 /// Return true if the calling convention is one that we can guarantee TCO for.
4990  return CC == CallingConv::Fast;
4991 }
4992 
4993 /// Return true if we might ever do TCO for calls with this calling convention.
4995  switch (CC) {
4996  case CallingConv::C:
4999  case CallingConv::Swift:
5000  return true;
5001  default:
5002  return canGuaranteeTCO(CC);
5003  }
5004 }
5005 
5006 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5007  SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5008  const SmallVectorImpl<ISD::OutputArg> &Outs,
5009  const SmallVectorImpl<SDValue> &OutVals,
5010  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5011  if (!mayTailCallThisCC(CalleeCC))
5012  return false;
5013 
5014  MachineFunction &MF = DAG.getMachineFunction();
5015  const Function &CallerF = MF.getFunction();
5016  CallingConv::ID CallerCC = CallerF.getCallingConv();
5017 
5018  // If this function uses the C calling convention but has an SVE signature,
5019  // then it preserves more registers and should assume the SVE_VectorCall CC.
5020  // The check for matching callee-saved regs will determine whether it is
5021  // eligible for TCO.
5022  if (CallerCC == CallingConv::C &&
5025 
5026  bool CCMatch = CallerCC == CalleeCC;
5027 
5028  // When using the Windows calling convention on a non-windows OS, we want
5029  // to back up and restore X18 in such functions; we can't do a tail call
5030  // from those functions.
5031  if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5032  CalleeCC != CallingConv::Win64)
5033  return false;
5034 
5035  // Byval parameters hand the function a pointer directly into the stack area
5036  // we want to reuse during a tail call. Working around this *is* possible (see
5037  // X86) but less efficient and uglier in LowerCall.
5038  for (Function::const_arg_iterator i = CallerF.arg_begin(),
5039  e = CallerF.arg_end();
5040  i != e; ++i) {
5041  if (i->hasByValAttr())
5042  return false;
5043 
5044  // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5045  // In this case, it is necessary to save/restore X0 in the callee. Tail
5046  // call opt interferes with this. So we disable tail call opt when the
5047  // caller has an argument with "inreg" attribute.
5048 
5049  // FIXME: Check whether the callee also has an "inreg" argument.
5050  if (i->hasInRegAttr())
5051  return false;
5052  }
5053 
5054  if (getTargetMachine().Options.GuaranteedTailCallOpt)
5055  return canGuaranteeTCO(CalleeCC) && CCMatch;
5056 
5057  // Externally-defined functions with weak linkage should not be
5058  // tail-called on AArch64 when the OS does not support dynamic
5059  // pre-emption of symbols, as the AAELF spec requires normal calls
5060  // to undefined weak functions to be replaced with a NOP or jump to the
5061  // next instruction. The behaviour of branch instructions in this
5062  // situation (as used for tail calls) is implementation-defined, so we
5063  // cannot rely on the linker replacing the tail call with a return.
5064  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5065  const GlobalValue *GV = G->getGlobal();
5067  if (GV->hasExternalWeakLinkage() &&
5068  (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5069  return false;
5070  }
5071 
5072  // Now we search for cases where we can use a tail call without changing the
5073  // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5074  // concept.
5075 
5076  // I want anyone implementing a new calling convention to think long and hard
5077  // about this assert.
5078  assert((!isVarArg || CalleeCC == CallingConv::C) &&
5079  "Unexpected variadic calling convention");
5080 
5081  LLVMContext &C = *DAG.getContext();
5082  if (isVarArg && !Outs.empty()) {
5083  // At least two cases here: if caller is fastcc then we can't have any
5084  // memory arguments (we'd be expected to clean up the stack afterwards). If
5085  // caller is C then we could potentially use its argument area.
5086 
5087  // FIXME: for now we take the most conservative of these in both cases:
5088  // disallow all variadic memory operands.
5090  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5091 
5092  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5093  for (const CCValAssign &ArgLoc : ArgLocs)
5094  if (!ArgLoc.isRegLoc())
5095  return false;
5096  }
5097 
5098  // Check that the call results are passed in the same way.
5099  if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5100  CCAssignFnForCall(CalleeCC, isVarArg),
5101  CCAssignFnForCall(CallerCC, isVarArg)))
5102  return false;
5103  // The callee has to preserve all registers the caller needs to preserve.
5104  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5105  const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5106  if (!CCMatch) {
5107  const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5108  if (Subtarget->hasCustomCallingConv()) {
5109  TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5110  TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5111  }
5112  if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5113  return false;
5114  }
5115 
5116  // Nothing more to check if the callee is taking no arguments
5117  if (Outs.empty())
5118  return true;
5119 
5121  CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5122 
5123  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5124 
5125  const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5126 
5127  // If any of the arguments is passed indirectly, it must be SVE, so the
5128  // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5129  // allocate space on the stack. That is why we determine this explicitly here
5130  // the call cannot be a tailcall.
5131  if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5132  assert((A.getLocInfo() != CCValAssign::Indirect ||
5133  A.getValVT().isScalableVector()) &&
5134  "Expected value to be scalable");
5135  return A.getLocInfo() == CCValAssign::Indirect;
5136  }))
5137  return false;
5138 
5139  // If the stack arguments for this call do not fit into our own save area then
5140  // the call cannot be made tail.
5141  if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5142  return false;
5143 
5144  const MachineRegisterInfo &MRI = MF.getRegInfo();
5145  if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5146  return false;
5147 
5148  return true;
5149 }
5150 
5151 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5152  SelectionDAG &DAG,
5153  MachineFrameInfo &MFI,
5154  int ClobberedFI) const {
5155  SmallVector<SDValue, 8> ArgChains;
5156  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5157  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5158 
5159  // Include the original chain at the beginning of the list. When this is
5160  // used by target LowerCall hooks, this helps legalize find the
5161  // CALLSEQ_BEGIN node.
5162  ArgChains.push_back(Chain);
5163 
5164  // Add a chain value for each stack argument corresponding
5165  for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5166  UE = DAG.getEntryNode().getNode()->use_end();
5167  U != UE; ++U)
5168  if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5169  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5170  if (FI->getIndex() < 0) {
5171  int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5172  int64_t InLastByte = InFirstByte;
5173  InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5174 
5175  if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5176  (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5177  ArgChains.push_back(SDValue(L, 1));
5178  }
5179 
5180  // Build a tokenfactor for all the chains.
5181  return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5182 }
5183 
5184 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5185  bool TailCallOpt) const {
5186  return CallCC == CallingConv::Fast && TailCallOpt;
5187 }
5188 
5189 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5190 /// and add input and output parameter nodes.
5191 SDValue
5192 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5193  SmallVectorImpl<SDValue> &InVals) const {
5194  SelectionDAG &DAG = CLI.DAG;
5195  SDLoc &DL = CLI.DL;
5196  SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5197  SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5199  SDValue Chain = CLI.Chain;
5200  SDValue Callee = CLI.Callee;
5201  bool &IsTailCall = CLI.IsTailCall;
5202  CallingConv::ID CallConv = CLI.CallConv;
5203  bool IsVarArg = CLI.IsVarArg;
5204 
5205  MachineFunction &MF = DAG.getMachineFunction();
5207  bool IsThisReturn = false;
5208 
5210  bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5211  bool IsSibCall = false;
5212 
5213  // Check callee args/returns for SVE registers and set calling convention
5214  // accordingly.
5215  if (CallConv == CallingConv::C) {
5216  bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5217  return Out.VT.isScalableVector();
5218  });
5219  bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5220  return In.VT.isScalableVector();
5221  });
5222 
5223  if (CalleeInSVE || CalleeOutSVE)
5225  }
5226 
5227  if (IsTailCall) {
5228  // Check if it's really possible to do a tail call.
5229  IsTailCall = isEligibleForTailCallOptimization(
5230  Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5231  if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5232  report_fatal_error("failed to perform tail call elimination on a call "
5233  "site marked musttail");
5234 
5235  // A sibling call is one where we're under the usual C ABI and not planning
5236  // to change that but can still do a tail call:
5237  if (!TailCallOpt && IsTailCall)
5238  IsSibCall = true;
5239 
5240  if (IsTailCall)
5241  ++NumTailCalls;
5242  }
5243 
5244  // Analyze operands of the call, assigning locations to each operand.
5246  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5247  *DAG.getContext());
5248 
5249  if (IsVarArg) {
5250  // Handle fixed and variable vector arguments differently.
5251  // Variable vector arguments always go into memory.
5252  unsigned NumArgs = Outs.size();
5253 
5254  for (unsigned i = 0; i != NumArgs; ++i) {
5255  MVT ArgVT = Outs[i].VT;
5256  if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5257  report_fatal_error("Passing SVE types to variadic functions is "
5258  "currently not supported");
5259 
5260  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5261  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
5262  /*IsVarArg=*/ !Outs[i].IsFixed);
5263  bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5264  assert(!Res && "Call operand has unhandled type");
5265  (void)Res;
5266  }
5267  } else {
5268  // At this point, Outs[].VT may already be promoted to i32. To correctly
5269  // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5270  // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5271  // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5272  // we use a special version of AnalyzeCallOperands to pass in ValVT and
5273  // LocVT.
5274  unsigned NumArgs = Outs.size();
5275  for (unsigned i = 0; i != NumArgs; ++i) {
5276  MVT ValVT = Outs[i].VT;
5277  // Get type of the original argument.
5278  EVT ActualVT = getValueType(DAG.getDataLayout(),
5279  CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5280  /*AllowUnknown*/ true);
5281  MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5282  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5283  // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5284  if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5285  ValVT = MVT::i8;
5286  else if (ActualMVT == MVT::i16)
5287  ValVT = MVT::i16;
5288 
5289  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5290  bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5291  assert(!Res && "Call operand has unhandled type");
5292  (void)Res;
5293  }
5294  }
5295 
5296  // Get a count of how many bytes are to be pushed on the stack.
5297  unsigned NumBytes = CCInfo.getNextStackOffset();
5298 
5299  if (IsSibCall) {
5300  // Since we're not changing the ABI to make this a tail call, the memory
5301  // operands are already available in the caller's incoming argument space.
5302  NumBytes = 0;
5303  }
5304 
5305  // FPDiff is the byte offset of the call's argument area from the callee's.
5306  // Stores to callee stack arguments will be placed in FixedStackSlots offset
5307  // by this amount for a tail call. In a sibling call it must be 0 because the
5308  // caller will deallocate the entire stack and the callee still expects its
5309  // arguments to begin at SP+0. Completely unused for non-tail calls.
5310  int FPDiff = 0;
5311 
5312  if (IsTailCall && !IsSibCall) {
5313  unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5314 
5315  // Since callee will pop argument stack as a tail call, we must keep the
5316  // popped size 16-byte aligned.
5317  NumBytes = alignTo(NumBytes, 16);
5318 
5319  // FPDiff will be negative if this tail call requires more space than we
5320  // would automatically have in our incoming argument space. Positive if we
5321  // can actually shrink the stack.
5322  FPDiff = NumReusableBytes - NumBytes;
5323 
5324  // The stack pointer must be 16-byte aligned at all times it's used for a
5325  // memory operation, which in practice means at *all* times and in
5326  // particular across call boundaries. Therefore our own arguments started at
5327  // a 16-byte aligned SP and the delta applied for the tail call should
5328  // satisfy the same constraint.
5329  assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
5330  }
5331 
5332  // Adjust the stack pointer for the new arguments...
5333  // These operations are automatically eliminated by the prolog/epilog pass
5334  if (!IsSibCall)
5335  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
5336 
5337  SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5338  getPointerTy(DAG.getDataLayout()));
5339 
5341  SmallSet<unsigned, 8> RegsUsed;
5342  SmallVector<SDValue, 8> MemOpChains;
5343  auto PtrVT = getPointerTy(DAG.getDataLayout());
5344 
5345  if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5346  const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5347  for (const auto &F : Forwards) {
5348  SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5349  RegsToPass.emplace_back(F.PReg, Val);
5350  }
5351  }
5352 
5353  // Walk the register/memloc assignments, inserting copies/loads.
5354  unsigned ExtraArgLocs = 0;
5355  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5356  CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5357  SDValue Arg = OutVals[i];
5358  ISD::ArgFlagsTy Flags = Outs[i].Flags;
5359 
5360  // Promote the value if needed.
5361  switch (VA.getLocInfo()) {
5362  default:
5363  llvm_unreachable("Unknown loc info!");
5364  case CCValAssign::Full:
5365  break;
5366  case CCValAssign::SExt:
5367  Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5368  break;
5369  case CCValAssign::ZExt:
5370  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5371  break;
5372  case CCValAssign::AExt:
5373  if (Outs[i].ArgVT == MVT::i1) {
5374  // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5375  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5377  }
5378  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5379  break;
5381  assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
5382  Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5383  Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5384  DAG.getConstant(32, DL, VA.getLocVT()));
5385  break;
5386  case CCValAssign::BCvt:
5387  Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5388  break;
5389  case CCValAssign::Trunc:
5390  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5391  break;
5392  case CCValAssign::FPExt:
5393  Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
5394  break;
5395  case CCValAssign::Indirect:
5396  assert(VA.getValVT().isScalableVector() &&
5397  "Only scalable vectors can be passed indirectly");
5398 
5399  uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
5400  uint64_t PartSize = StoreSize;
5401  unsigned NumParts = 1;
5402  if (Outs[i].Flags.isInConsecutiveRegs()) {
5403  assert(!Outs[i].Flags.isInConsecutiveRegsLast());
5404  while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5405  ++NumParts;
5406  StoreSize *= NumParts;
5407  }
5408 
5410  Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
5411  Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
5412  int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
5414 
5415  MachinePointerInfo MPI =
5417  SDValue Ptr = DAG.getFrameIndex(
5419  SDValue SpillSlot = Ptr;
5420 
5421  // Ensure we generate all stores for each tuple part, whilst updating the
5422  // pointer after each store correctly using vscale.
5423  while (NumParts) {
5424  Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
5425  NumParts--;
5426  if (NumParts > 0) {
5427  SDValue BytesIncrement = DAG.getVScale(
5428  DL, Ptr.getValueType(),
5429  APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5430  SDNodeFlags Flags;
5431  Flags.setNoUnsignedWrap(true);
5432 
5433  MPI = MachinePointerInfo(MPI.getAddrSpace());
5434  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5435  BytesIncrement, Flags);
5436  ExtraArgLocs++;
5437  i++;
5438  }
5439  }
5440 
5441  Arg = SpillSlot;
5442  break;
5443  }
5444 
5445  if (VA.isRegLoc()) {
5446  if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
5447  Outs[0].VT == MVT::i64) {
5448  assert(VA.getLocVT() == MVT::i64 &&
5449  "unexpected calling convention register assignment");
5450  assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
5451  "unexpected use of 'returned'");
5452  IsThisReturn = true;
5453  }
5454  if (RegsUsed.count(VA.getLocReg())) {
5455  // If this register has already been used then we're trying to pack
5456  // parts of an [N x i32] into an X-register. The extension type will
5457  // take care of putting the two halves in the right place but we have to
5458  // combine them.
5459  SDValue &Bits =
5460  llvm::find_if(RegsToPass,
5461  [=](const std::pair<unsigned, SDValue> &Elt) {
5462  return Elt.first == VA.getLocReg();
5463  })
5464  ->second;
5465  Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5466  // Call site info is used for function's parameter entry value
5467  // tracking. For now we track only simple cases when parameter
5468  // is transferred through whole register.
5469  llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
5470  return ArgReg.Reg == VA.getLocReg();
5471  });
5472  } else {
5473  RegsToPass.emplace_back(VA.getLocReg(), Arg);
5474  RegsUsed.insert(VA.getLocReg());
5475  const TargetOptions &Options = DAG.getTarget().Options;
5476  if (Options.EmitCallSiteInfo)
5477  CSInfo.emplace_back(VA.getLocReg(), i);
5478  }
5479  } else {
5480  assert(VA.isMemLoc());
5481 
5482  SDValue DstAddr;
5483  MachinePointerInfo DstInfo;
5484 
5485  // FIXME: This works on big-endian for composite byvals, which are the
5486  // common case. It should also work for fundamental types too.
5487  uint32_t BEAlign = 0;
5488  unsigned OpSize;
5489  if (VA.getLocInfo() == CCValAssign::Indirect)
5490  OpSize = VA.getLocVT().getFixedSizeInBits();
5491  else
5492  OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
5493  : VA.getValVT().getSizeInBits();
5494  OpSize = (OpSize + 7) / 8;
5495  if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
5496  !Flags.isInConsecutiveRegs()) {
5497  if (OpSize < 8)
5498  BEAlign = 8 - OpSize;
5499  }
5500  unsigned LocMemOffset = VA.getLocMemOffset();
5501  int32_t Offset = LocMemOffset + BEAlign;
5502  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
5503  PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
5504 
5505  if (IsTailCall) {
5506  Offset = Offset + FPDiff;
5507  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5508 
5509  DstAddr = DAG.getFrameIndex(FI, PtrVT);
5510  DstInfo =
5512 
5513  // Make sure any stack arguments overlapping with where we're storing
5514  // are loaded before this eventual operation. Otherwise they'll be
5515  // clobbered.
5516  Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
5517  } else {
5518  SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
5519 
5520  DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
5522  LocMemOffset);
5523  }
5524 
5525  if (Outs[i].Flags.isByVal()) {
5526  SDValue SizeNode =
5527  DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
5528  SDValue Cpy = DAG.getMemcpy(
5529  Chain, DL, DstAddr, Arg, SizeNode,
5530  Outs[i].Flags.getNonZeroByValAlign(),
5531  /*isVol = */ false, /*AlwaysInline = */ false,
5532  /*isTailCall = */ false, DstInfo, MachinePointerInfo());
5533 
5534  MemOpChains.push_back(Cpy);
5535  } else {
5536  // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
5537  // promoted to a legal register type i32, we should truncate Arg back to
5538  // i1/i8/i16.
5539  if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
5540  VA.getValVT() == MVT::i16)
5541  Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
5542 
5543  SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
5544  MemOpChains.push_back(Store);
5545  }
5546  }
5547  }
5548 
5549  if (!MemOpChains.empty())
5550  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5551 
5552  // Build a sequence of copy-to-reg nodes chained together with token chain
5553  // and flag operands which copy the outgoing args into the appropriate regs.
5554  SDValue InFlag;
5555  for (auto &RegToPass : RegsToPass) {
5556  Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
5557  RegToPass.second, InFlag);
5558  InFlag = Chain.getValue(1);
5559  }
5560 
5561  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
5562  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
5563  // node so that legalize doesn't hack it.
5564  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5565  auto GV = G->getGlobal();
5566  unsigned OpFlags =
5568  if (OpFlags & AArch64II::MO_GOT) {
5569  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
5570  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
5571  } else {
5572  const GlobalValue *GV = G->getGlobal();
5573  Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
5574  }
5575  } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5577  Subtarget->isTargetMachO()) {
5578  const char *Sym = S->getSymbol();
5580  Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
5581  } else {
5582  const char *Sym = S->getSymbol();
5583  Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
5584  }
5585  }
5586 
5587  // We don't usually want to end the call-sequence here because we would tidy
5588  // the frame up *after* the call, however in the ABI-changing tail-call case
5589  // we've carefully laid out the parameters so that when sp is reset they'll be
5590  // in the correct location.
5591  if (IsTailCall && !IsSibCall) {
5592  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
5593  DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
5594  InFlag = Chain.getValue(1);
5595  }
5596 
5597  std::vector<SDValue> Ops;
5598  Ops.push_back(Chain);
5599  Ops.push_back(Callee);
5600 
5601  if (IsTailCall) {
5602  // Each tail call may have to adjust the stack by a different amount, so
5603  // this information must travel along with the operation for eventual
5604  // consumption by emitEpilogue.
5605  Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
5606  }
5607 
5608  // Add argument registers to the end of the list so that they are known live
5609  // into the call.
5610  for (auto &RegToPass : RegsToPass)
5611  Ops.push_back(DAG.getRegister(RegToPass.first,
5612  RegToPass.second.getValueType()));
5613 
5614  // Add a register mask operand representing the call-preserved registers.
5615  const uint32_t *Mask;
5616  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5617  if (IsThisReturn) {
5618  // For 'this' returns, use the X0-preserving mask if applicable
5619  Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
5620  if (!Mask) {
5621  IsThisReturn = false;
5622  Mask = TRI->getCallPreservedMask(MF, CallConv);
5623  }
5624  } else
5625  Mask = TRI->getCallPreservedMask(MF, CallConv);
5626 
5627  if (Subtarget->hasCustomCallingConv())
5628  TRI->UpdateCustomCallPreservedMask(MF, &Mask);
5629 
5630  if (TRI->isAnyArgRegReserved(MF))
5631  TRI->emitReservedArgRegCallError(MF);
5632 
5633  assert(Mask && "Missing call preserved mask for calling convention");
5634  Ops.push_back(DAG.getRegisterMask(Mask));
5635 
5636  if (InFlag.getNode())
5637  Ops.push_back(InFlag);
5638 
5639  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5640 
5641  // If we're doing a tall call, use a TC_RETURN here rather than an
5642  // actual call instruction.
5643  if (IsTailCall) {
5645  SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
5646  DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
5647  return Ret;
5648  }
5649 
5650  unsigned CallOpc = AArch64ISD::CALL;
5651  // Calls marked with "rv_marker" are special. They should be expanded to the
5652  // call, directly followed by a special marker sequence. Use the CALL_RVMARKER
5653  // to do that.
5654  if (CLI.CB && CLI.CB->hasRetAttr("rv_marker")) {
5655  assert(!IsTailCall && "tail calls cannot be marked with rv_marker");
5656  CallOpc = AArch64ISD::CALL_RVMARKER;
5657  }
5658 
5659  // Returns a chain and a flag for retval copy to use.
5660  Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
5661  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5662  InFlag = Chain.getValue(1);
5663  DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
5664 
5665  uint64_t CalleePopBytes =
5666  DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
5667 
5668  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
5669  DAG.getIntPtrConstant(CalleePopBytes, DL, true),
5670  InFlag, DL);
5671  if (!Ins.empty())
5672  InFlag = Chain.getValue(1);
5673 
5674  // Handle result values, copying them out of physregs into vregs that we
5675  // return.
5676  return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
5677  InVals, IsThisReturn,
5678  IsThisReturn ? OutVals[0] : SDValue());
5679 }
5680 
5681 bool AArch64TargetLowering::CanLowerReturn(
5682  CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
5683  const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5684  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5686  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
5687  return CCInfo.CheckReturn(Outs, RetCC);
5688 }
5689 
5690 SDValue
5691 AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
5692  bool isVarArg,
5693  const SmallVectorImpl<ISD::OutputArg> &Outs,
5694  const SmallVectorImpl<SDValue> &OutVals,
5695  const SDLoc &DL, SelectionDAG &DAG) const {
5696  auto &MF = DAG.getMachineFunction();
5697  auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5698 
5699  CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5701  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5702  *DAG.getContext());
5703  CCInfo.AnalyzeReturn(Outs, RetCC);
5704 
5705  // Copy the result values into the output registers.
5706  SDValue Flag;
5708  SmallSet<unsigned, 4> RegsUsed;
5709  for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
5710  ++i, ++realRVLocIdx) {
5711  CCValAssign &VA = RVLocs[i];
5712  assert(VA.isRegLoc() && "Can only return in registers!");
5713  SDValue Arg = OutVals[realRVLocIdx];
5714 
5715  switch (VA.getLocInfo()) {
5716  default:
5717  llvm_unreachable("Unknown loc info!");
5718  case CCValAssign::Full:
5719  if (Outs[i].ArgVT == MVT::i1) {
5720  // AAPCS requires i1 to be zero-extended to i8 by the producer of the
5721  // value. This is strictly redundant on Darwin (which uses "zeroext
5722  // i1"), but will be optimised out before ISel.
5723  Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5724  Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5725  }
5726  break;
5727  case CCValAssign::BCvt:
5728  Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
5729  break;
5730  case CCValAssign::AExt:
5731  case CCValAssign::ZExt:
5732  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5733  break;
5735  assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
5736  Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
5737  Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5738  DAG.getConstant(32, DL, VA.getLocVT()));
5739  break;
5740  }
5741 
5742  if (RegsUsed.count(VA.getLocReg())) {
5743  SDValue &Bits =
5744  llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
5745  return Elt.first == VA.getLocReg();
5746  })->second;
5747  Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
5748  } else {
5749  RetVals.emplace_back(VA.getLocReg(), Arg);
5750  RegsUsed.insert(VA.getLocReg());
5751  }
5752  }
5753 
5754  SmallVector<SDValue, 4> RetOps(1, Chain);
5755  for (auto &RetVal : RetVals) {
5756  Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
5757  Flag = Chain.getValue(1);
5758  RetOps.push_back(
5759  DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
5760  }
5761 
5762  // Windows AArch64 ABIs require that for returning structs by value we copy
5763  // the sret argument into X0 for the return.
5764  // We saved the argument into a virtual register in the entry block,
5765  // so now we copy the value out and into X0.
5766  if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
5767  SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
5768  getPointerTy(MF.getDataLayout()));
5769 
5770  unsigned RetValReg = AArch64::X0;
5771  Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
5772  Flag = Chain.getValue(1);
5773 
5774  RetOps.push_back(
5775  DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
5776  }
5777 
5778  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5779  const MCPhysReg *I =
5780  TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
5781  if (I) {
5782  for (; *I; ++I) {
5783  if (AArch64::GPR64RegClass.contains(*I))
5784  RetOps.push_back(DAG.getRegister(*I, MVT::i64));
5785  else if (AArch64::FPR64RegClass.contains(*I))
5786  RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
5787  else
5788  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
5789  }
5790  }
5791 
5792  RetOps[0] = Chain; // Update chain.
5793 
5794  // Add the flag if we have it.
5795  if (Flag.getNode())
5796  RetOps.push_back(Flag);
5797 
5798  return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
5799 }
5800 
5801 //===----------------------------------------------------------------------===//
5802 // Other Lowering Code
5803 //===----------------------------------------------------------------------===//
5804 
5805 SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
5806  SelectionDAG &DAG,
5807  unsigned Flag) const {
5808  return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
5809  N->getOffset(), Flag);
5810 }
5811 
5812 SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
5813  SelectionDAG &DAG,
5814  unsigned Flag) const {
5815  return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
5816 }
5817 
5818 SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
5819  SelectionDAG &DAG,
5820  unsigned Flag) const {
5821  return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
5822  N->getOffset(), Flag);
5823 }
5824 
5825 SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
5826  SelectionDAG &DAG,
5827  unsigned Flag) const {
5828  return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
5829 }
5830 
5831 // (loadGOT sym)
5832 template <class NodeTy>
5833 SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
5834  unsigned Flags) const {
5835  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n");
5836  SDLoc DL(N);
5837  EVT Ty = getPointerTy(DAG.getDataLayout());
5838  SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
5839  // FIXME: Once remat is capable of dealing with instructions with register
5840  // operands, expand this into two nodes instead of using a wrapper node.
5841  return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
5842 }
5843 
5844 // (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
5845 template <class NodeTy>
5846 SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
5847  unsigned Flags) const {
5848  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n");
5849  SDLoc DL(N);
5850  EVT Ty = getPointerTy(DAG.getDataLayout());
5851  const unsigned char MO_NC = AArch64II::MO_NC;
5852  return DAG.getNode(
5854  getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
5855  getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
5856  getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
5857  getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
5858 }
5859 
5860 // (addlow (adrp %hi(sym)) %lo(sym))
5861 template <class NodeTy>
5862 SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
5863  unsigned Flags) const {
5864  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n");
5865  SDLoc DL(N);
5866  EVT Ty = getPointerTy(DAG.getDataLayout());
5867  SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
5868  SDValue Lo = getTargetNode(N, Ty, DAG,
5870  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
5871  return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
5872 }
5873 
5874 // (adr sym)
5875 template <class NodeTy>
5876 SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
5877  unsigned Flags) const {
5878  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n");
5879  SDLoc DL(N);
5880  EVT Ty = getPointerTy(DAG.getDataLayout());
5881  SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
5882  return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
5883 }
5884 
5885 SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
5886  SelectionDAG &DAG) const {
5887  GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
5888  const GlobalValue *GV = GN->getGlobal();
5889  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
5890 
5891  if (OpFlags != AArch64II::MO_NO_FLAG)
5892  assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
5893  "unexpected offset in global node");
5894 
5895  // This also catches the large code model case for Darwin, and tiny code
5896  // model with got relocations.
5897  if ((OpFlags & AArch64II::MO_GOT) != 0) {
5898  return getGOT(GN, DAG, OpFlags);
5899  }
5900 
5901  SDValue Result;
5903  Result = getAddrLarge(GN, DAG, OpFlags);
5904  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5905  Result = getAddrTiny(GN, DAG, OpFlags);
5906  } else {
5907  Result = getAddr(GN, DAG, OpFlags);
5908  }
5909  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5910  SDLoc DL(GN);
5912  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
5914  return Result;
5915 }
5916 
5917 /// Convert a TLS address reference into the correct sequence of loads
5918 /// and calls to compute the variable's address (for Darwin, currently) and
5919 /// return an SDValue containing the final node.
5920 
5921 /// Darwin only has one TLS scheme which must be capable of dealing with the
5922 /// fully general situation, in the worst case. This means:
5923 /// + "extern __thread" declaration.
5924 /// + Defined in a possibly unknown dynamic library.
5925 ///
5926 /// The general system is that each __thread variable has a [3 x i64] descriptor
5927 /// which contains information used by the runtime to calculate the address. The
5928 /// only part of this the compiler needs to know about is the first xword, which
5929 /// contains a function pointer that must be called with the address of the
5930 /// entire descriptor in "x0".
5931 ///
5932 /// Since this descriptor may be in a different unit, in general even the
5933 /// descriptor must be accessed via an indirect load. The "ideal" code sequence
5934 /// is:
5935 /// adrp x0, _var@TLVPPAGE
5936 /// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
5937 /// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
5938 /// ; the function pointer
5939 /// blr x1 ; Uses descriptor address in x0
5940 /// ; Address of _var is now in x0.
5941 ///
5942 /// If the address of _var's descriptor *is* known to the linker, then it can
5943 /// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
5944 /// a slight efficiency gain.
5945 SDValue
5946 AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
5947  SelectionDAG &DAG) const {
5948  assert(Subtarget->isTargetDarwin() &&
5949  "This function expects a Darwin target");
5950 
5951  SDLoc DL(Op);
5952  MVT PtrVT = getPointerTy(DAG.getDataLayout());
5953  MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
5954  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
5955 
5956  SDValue TLVPAddr =
5957  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
5958  SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
5959 
5960  // The first entry in the descriptor is a function pointer that we must call
5961  // to obtain the address of the variable.
5962  SDValue Chain = DAG.getEntryNode();
5963  SDValue FuncTLVGet = DAG.getLoad(
5964  PtrMemVT, DL, Chain, DescAddr,
5966  Align(PtrMemVT.getSizeInBits() / 8),
5968  Chain = FuncTLVGet.getValue(1);
5969 
5970  // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
5971  FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
5972 
5974  MFI.setAdjustsStack(true);
5975 
5976  // TLS calls preserve all registers except those that absolutely must be
5977  // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
5978  // silly).
5979  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5980  const uint32_t *Mask = TRI->getTLSCallPreservedMask();
5981  if (Subtarget->hasCustomCallingConv())
5982  TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
5983 
5984  // Finally, we can make the call. This is just a degenerate version of a
5985  // normal AArch64 call node: x0 takes the address of the descriptor, and
5986  // returns the address of the variable in this thread.
5987  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
5988  Chain =
5990  Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
5991  DAG.getRegisterMask(Mask), Chain.getValue(1));
5992  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
5993 }
5994 
5995 /// Convert a thread-local variable reference into a sequence of instructions to
5996 /// compute the variable's address for the local exec TLS model of ELF targets.
5997 /// The sequence depends on the maximum TLS area size.
5998 SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
5999  SDValue ThreadBase,
6000  const SDLoc &DL,
6001  SelectionDAG &DAG) const {
6002  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6003  SDValue TPOff, Addr;
6004 
6005  switch (DAG.getTarget().Options.TLSSize) {
6006  default:
6007  llvm_unreachable("Unexpected TLS size");
6008 
6009  case 12: {
6010  // mrs x0, TPIDR_EL0
6011  // add x0, x0, :tprel_lo12:a
6012  SDValue Var = DAG.getTargetGlobalAddress(
6013  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
6014  return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6015  Var,
6016  DAG.getTargetConstant(0, DL, MVT::i32)),
6017  0);
6018  }
6019 
6020  case 24: {
6021  // mrs x0, TPIDR_EL0
6022  // add x0, x0, :tprel_hi12:a
6023  // add x0, x0, :tprel_lo12_nc:a
6024  SDValue HiVar = DAG.getTargetGlobalAddress(
6025  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6026  SDValue LoVar = DAG.getTargetGlobalAddress(
6027  GV, DL, PtrVT, 0,
6029  Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6030  HiVar,
6031  DAG.getTargetConstant(0, DL, MVT::i32)),
6032  0);
6033  return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
6034  LoVar,
6035  DAG.getTargetConstant(0, DL, MVT::i32)),
6036  0);
6037  }
6038 
6039  case 32: {
6040  // mrs x1, TPIDR_EL0
6041  // movz x0, #:tprel_g1:a
6042  // movk x0, #:tprel_g0_nc:a
6043  // add x0, x1, x0
6044  SDValue HiVar = DAG.getTargetGlobalAddress(
6045  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
6046  SDValue LoVar = DAG.getTargetGlobalAddress(
6047  GV, DL, PtrVT, 0,
6049  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6050  DAG.getTargetConstant(16, DL, MVT::i32)),
6051  0);
6052  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6053  DAG.getTargetConstant(0, DL, MVT::i32)),
6054  0);
6055  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6056  }
6057 
6058  case 48: {
6059  // mrs x1, TPIDR_EL0
6060  // movz x0, #:tprel_g2:a
6061  // movk x0, #:tprel_g1_nc:a
6062  // movk x0, #:tprel_g0_nc:a
6063  // add x0, x1, x0
6064  SDValue HiVar = DAG.getTargetGlobalAddress(
6065  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
6066  SDValue MiVar = DAG.getTargetGlobalAddress(
6067  GV, DL, PtrVT, 0,
6069  SDValue LoVar = DAG.getTargetGlobalAddress(
6070  GV, DL, PtrVT, 0,
6072  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6073  DAG.getTargetConstant(32, DL, MVT::i32)),
6074  0);
6075  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
6076  DAG.getTargetConstant(16, DL, MVT::i32)),
6077  0);
6078  TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6079  DAG.getTargetConstant(0, DL, MVT::i32)),
6080  0);
6081  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6082  }
6083  }
6084 }
6085 
6086 /// When accessing thread-local variables under either the general-dynamic or
6087 /// local-dynamic system, we make a "TLS-descriptor" call. The variable will
6088 /// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
6089 /// is a function pointer to carry out the resolution.
6090 ///
6091 /// The sequence is:
6092 /// adrp x0, :tlsdesc:var
6093 /// ldr x1, [x0, #:tlsdesc_lo12:var]
6094 /// add x0, x0, #:tlsdesc_lo12:var
6095 /// .tlsdesccall var
6096 /// blr x1
6097 /// (TPIDR_EL0 offset now in x0)
6098 ///
6099 /// The above sequence must be produced unscheduled, to enable the linker to
6100 /// optimize/relax this sequence.
6101 /// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
6102 /// above sequence, and expanded really late in the compilation flow, to ensure
6103 /// the sequence is produced as per above.
6104 SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
6105  const SDLoc &DL,
6106  SelectionDAG &DAG) const {
6107  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6108 
6109  SDValue Chain = DAG.getEntryNode();
6110  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6111 
6112  Chain =
6113  DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
6114  SDValue Glue = Chain.getValue(1);
6115 
6116  return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
6117 }
6118 
6119 SDValue
6120 AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
6121  SelectionDAG &DAG) const {
6122  assert(Subtarget->isTargetELF() && "This function expects an ELF target");
6123 
6124  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6125 
6127 
6131  }
6132 
6135  report_fatal_error("ELF TLS only supported in small memory model or "
6136  "in local exec TLS model");
6137  // Different choices can be made for the maximum size of the TLS area for a
6138  // module. For the small address model, the default TLS size is 16MiB and the
6139  // maximum TLS size is 4GiB.
6140  // FIXME: add tiny and large code model support for TLS access models other
6141  // than local exec. We currently generate the same code as small for tiny,
6142  // which may be larger than needed.
6143 
6144  SDValue TPOff;
6145  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6146  SDLoc DL(Op);
6147  const GlobalValue *GV = GA->getGlobal();
6148 
6149  SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
6150 
6151  if (Model == TLSModel::LocalExec) {
6152  return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
6153  } else if (Model == TLSModel::InitialExec) {
6154  TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6155  TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
6156  } else if (Model == TLSModel::LocalDynamic) {
6157  // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
6158  // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
6159  // the beginning of the module's TLS region, followed by a DTPREL offset
6160  // calculation.
6161 
6162  // These accesses will need deduplicating if there's more than one.
6163  AArch64FunctionInfo *MFI =
6166 
6167  // The call needs a relocation too for linker relaxation. It doesn't make
6168  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6169  // the address.
6170  SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
6172 
6173  // Now we can calculate the offset from TPIDR_EL0 to this module's
6174  // thread-local area.
6175  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6176 
6177  // Now use :dtprel_whatever: operations to calculate this variable's offset
6178  // in its thread-storage area.
6179  SDValue HiVar = DAG.getTargetGlobalAddress(
6181  SDValue LoVar = DAG.getTargetGlobalAddress(
6182  GV, DL, MVT::i64, 0,
6184 
6185  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
6186  DAG.getTargetConstant(0, DL, MVT::i32)),
6187  0);
6188  TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
6189  DAG.getTargetConstant(0, DL, MVT::i32)),
6190  0);
6191  } else if (Model == TLSModel::GeneralDynamic) {
6192  // The call needs a relocation too for linker relaxation. It doesn't make
6193  // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6194  // the address.
6195  SDValue SymAddr =
6196  DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6197 
6198  // Finally we can make a call to calculate the offset from tpidr_el0.
6199  TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6200  } else
6201  llvm_unreachable("Unsupported ELF TLS access model");
6202 
6203  return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6204 }
6205 
6206 SDValue
6207 AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
6208  SelectionDAG &DAG) const {
6209  assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
6210 
6211  SDValue Chain = DAG.getEntryNode();
6212  EVT PtrVT = getPointerTy(DAG.getDataLayout());
6213  SDLoc DL(Op);
6214 
6215  SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
6216 
6217  // Load the ThreadLocalStoragePointer from the TEB
6218  // A pointer to the TLS array is located at offset 0x58 from the TEB.
6219  SDValue TLSArray =
6220  DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
6221  TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
6222  Chain = TLSArray.getValue(1);
6223 
6224  // Load the TLS index from the C runtime;
6225  // This does the same as getAddr(), but without having a GlobalAddressSDNode.
6226  // This also does the same as LOADgot, but using a generic i32 load,
6227  // while LOADgot only loads i64.
6228  SDValue TLSIndexHi =
6229  DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
6230  SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
6231  "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6232  SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
6233  SDValue TLSIndex =
6234  DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
6235  TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
6236  Chain = TLSIndex.getValue(1);
6237 
6238  // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
6239  // offset into the TLSArray.
6240  TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
6241  SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
6242  DAG.getConstant(3, DL, PtrVT));
6243  SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
6244  DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
6245  MachinePointerInfo());
6246  Chain = TLS.getValue(1);
6247 
6248  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6249  const GlobalValue *GV = GA->getGlobal();
6250  SDValue TGAHi = DAG.getTargetGlobalAddress(
6251  GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6252  SDValue TGALo = DAG.getTargetGlobalAddress(
6253  GV, DL, PtrVT, 0,
6255 
6256  // Add the offset from the start of the .tls section (section base).
6257  SDValue Addr =
6258  SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
6259  DAG.getTargetConstant(0, DL, MVT::i32)),
6260  0);
6261  Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
6262  return Addr;
6263 }
6264 
6265 SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
6266  SelectionDAG &DAG) const {
6267  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6268  if (DAG.getTarget().useEmulatedTLS())
6269  return LowerToTLSEmulatedModel(GA, DAG);
6270 
6271  if (Subtarget->isTargetDarwin())
6272  return LowerDarwinGlobalTLSAddress(Op, DAG);
6273  if (Subtarget->isTargetELF())
6274  return LowerELFGlobalTLSAddress(Op, DAG);
6275  if (Subtarget->isTargetWindows())
6276  return LowerWindowsGlobalTLSAddress(Op, DAG);
6277 
6278  llvm_unreachable("Unexpected platform trying to use TLS");
6279 }
6280 
6281 // Looks through \param Val to determine the bit that can be used to
6282 // check the sign of the value. It returns the unextended value and
6283 // the sign bit position.
6284 std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
6285  if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
6286  return {Val.getOperand(0),
6287  cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
6288  1};
6289 
6290  if (Val.getOpcode() == ISD::SIGN_EXTEND)
6291  return {Val.getOperand(0),
6292  Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
6293 
6294  return {Val, Val.getValueSizeInBits() - 1};
6295 }
6296 
6297 SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
6298  SDValue Chain = Op.getOperand(0);
6299  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
6300  SDValue LHS = Op.getOperand(2);
6301  SDValue RHS = Op.getOperand(3);
6302  SDValue Dest = Op.getOperand(4);
6303  SDLoc dl(Op);
6304 
6305  MachineFunction &MF = DAG.getMachineFunction();
6306  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
6307  // will not be produced, as they are conditional branch instructions that do
6308  // not set flags.
6309  bool ProduceNonFlagSettingCondBr =
6310  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
6311 
6312  // Handle f128 first, since lowering it will result in comparing the return
6313  // value of a libcall against zero, which is just what the rest of LowerBR_CC
6314  // is expecting to deal with.
6315  if (LHS.getValueType() == MVT::f128) {
6316  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
6317 
6318  // If softenSetCCOperands returned a scalar, we need to compare the result
6319  // against zero to select between true and false values.
6320  if (!RHS.getNode()) {
6321  RHS = DAG.getConstant(0, dl, LHS.getValueType());
6322  CC = ISD::SETNE;
6323  }
6324  }
6325 
6326  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
6327  // instruction.
6328  if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
6329  (CC == ISD::SETEQ || CC == ISD::SETNE)) {
6330  // Only lower legal XALUO ops.
6331  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
6332  return SDValue();
6333 
6334  // The actual operation with overflow check.
6335  AArch64CC::CondCode OFCC;
6336  SDValue Value, Overflow;
6337  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
6338 
6339  if (CC == ISD::SETNE)
6340  OFCC = getInvertedCondCode(OFCC);
6341  SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
6342 
6343  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
6344  Overflow);
6345  }
6346 
6347  if (LHS.getValueType().isInteger()) {
6348  assert((LHS.getValueType() == RHS.getValueType()) &&
6349  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
6350 
6351  // If the RHS of the comparison is zero, we can potentially fold this
6352  // to a specialized branch.
6353  const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
6354  if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
6355  if (CC == ISD::SETEQ) {
6356  // See if we can use a TBZ to fold in an AND as well.
6357  // TBZ has a smaller branch displacement than CBZ. If the offset is
6358  // out of bounds, a late MI-layer pass rewrites branches.
6359  // 403.gcc is an example that hits this case.
6360  if (LHS.getOpcode() == ISD::AND &&
6361  isa<ConstantSDNode>(LHS.getOperand(1)) &&
6363  SDValue Test = LHS.getOperand(0);
6364  uint64_t Mask = LHS.getConstantOperandVal(1);
6365  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
6366  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6367  Dest);
6368  }
6369 
6370  return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
6371  } else if (CC == ISD::SETNE) {
6372  // See if we can use a TBZ to fold in an AND as well.
6373  // TBZ has a smaller branch displacement than CBZ. If the offset is
6374  // out of bounds, a late MI-layer pass rewrites branches.
6375  // 403.gcc is an example that hits this case.
6376  if (LHS.getOpcode() == ISD::AND &&
6377  isa<ConstantSDNode>(LHS.getOperand(1)) &&
6379  SDValue Test = LHS.getOperand(0);
6380  uint64_t Mask = LHS.getConstantOperandVal(1);
6381  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
6382  DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6383  Dest);
6384  }
6385 
6386  return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
6387  } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
6388  // Don't combine AND since emitComparison converts the AND to an ANDS
6389  // (a.k.a. TST) and the test in the test bit and branch instruction
6390  // becomes redundant. This would also increase register pressure.
6391  uint64_t SignBitPos;
6392  std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
6393  return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
6394  DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
6395  }
6396  }
6397  if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
6398  LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
6399  // Don't combine AND since emitComparison converts the AND to an ANDS
6400  // (a.k.a. TST) and the test in the test bit and branch instruction
6401  // becomes redundant. This would also increase register pressure.
6402  uint64_t SignBitPos;
6403  std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
6404  return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
6405  DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
6406  }
6407 
6408  SDValue CCVal;
6409  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
6410  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
6411  Cmp);
6412  }
6413 
6414  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||
6415  LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
6416 
6417  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
6418  // clean. Some of them require two branches to implement.
6419  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
6420  AArch64CC::CondCode CC1, CC2;
6421  changeFPCCToAArch64CC(CC, CC1, CC2);
6422  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
6423  SDValue BR1 =
6424  DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
6425  if (CC2 != AArch64CC::AL) {
6426  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
6427  return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
6428  Cmp);
6429  }
6430 
6431  return BR1;
6432 }
6433 
6434 SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
6435  SelectionDAG &DAG) const {
6436  EVT VT = Op.getValueType();
6437  SDLoc DL(Op);
6438 
6439  SDValue In1 = Op.getOperand(0);
6440  SDValue In2 = Op.getOperand(1);
6441  EVT SrcVT = In2.getValueType();
6442 
6443  if (SrcVT.bitsLT(VT))
6444  In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
6445  else if (SrcVT.bitsGT(VT))
6446  In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
6447 
6448  EVT VecVT;
6449  uint64_t EltMask;
6450  SDValue VecVal1, VecVal2;
6451 
6452  auto setVecVal = [&] (int Idx) {
6453  if (!VT.isVector()) {
6454  VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
6455  DAG.getUNDEF(VecVT), In1);
6456  VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
6457  DAG.getUNDEF(VecVT), In2);
6458  } else {
6459  VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
6460  VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
6461  }
6462  };
6463 
6464  if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
6465  VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
6466  EltMask = 0x80000000ULL;
6467  setVecVal(AArch64::ssub);
6468  } else if (VT == MVT::f64 || VT == MVT::v2f64) {
6469  VecVT = MVT::v2i64;
6470 
6471  // We want to materialize a mask with the high bit set, but the AdvSIMD
6472  // immediate moves cannot materialize that in a single instruction for
6473  // 64-bit elements. Instead, materialize zero and then negate it.
6474  EltMask = 0;
6475 
6476  setVecVal(AArch64::dsub);
6477  } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
6478  VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
6479  EltMask = 0x8000ULL;
6480  setVecVal(AArch64::hsub);
6481  } else {
6482  llvm_unreachable("Invalid type for copysign!");
6483  }
6484 
6485  SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
6486 
6487  // If we couldn't materialize the mask above, then the mask vector will be
6488  // the zero vector, and we need to negate it here.
6489  if (VT == MVT::f64 || VT == MVT::v2f64) {
6490  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
6491  BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
6492  BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
6493  }
6494 
6495  SDValue Sel =
6496  DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
6497 
6498  if (VT == MVT::f16)
6499  return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
6500  if (VT == MVT::f32)
6501  return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
6502  else if (VT == MVT::f64)
6503  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
6504  else
6505  return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
6506 }
6507 
6508 SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
6510  Attribute::NoImplicitFloat))
6511  return SDValue();
6512 
6513  if (!Subtarget->hasNEON())
6514  return SDValue();
6515 
6516  // While there is no integer popcount instruction, it can
6517  // be more efficiently lowered to the following sequence that uses
6518  // AdvSIMD registers/instructions as long as the copies to/from
6519  // the AdvSIMD registers are cheap.
6520  // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
6521  // CNT V0.8B, V0.8B // 8xbyte pop-counts
6522  // ADDV B0, V0.8B // sum 8xbyte pop-counts
6523  // UMOV X0, V0.B[0] // copy byte result back to integer reg
6524  SDValue Val = Op.getOperand(0);
6525  SDLoc DL(Op);
6526  EVT VT = Op.getValueType();
6527 
6528  if (VT == MVT::i32 || VT == MVT::i64) {
6529  if (VT == MVT::i32)
6530  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
6531  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
6532 
6533  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
6534  SDValue UaddLV = DAG.getNode(
6536  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
6537 
6538  if (VT == MVT::i64)
6539  UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
6540  return UaddLV;
6541  } else if (VT == MVT::i128) {
6542  Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
6543 
6544  SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
6545  SDValue UaddLV = DAG.getNode(
6547  DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
6548 
6549  return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
6550  }
6551 
6552  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
6553  return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
6554 
6555  assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6556  VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6557  "Unexpected type for custom ctpop lowering");
6558 
6559  EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6560  Val = DAG.getBitcast(VT8Bit, Val);
6561  Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
6562 
6563  // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6564  unsigned EltSize = 8;
6565  unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6566  while (EltSize != VT.getScalarSizeInBits()) {
6567  EltSize *= 2;
6568  NumElts /= 2;
6569  MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6570  Val = DAG.getNode(
6571  ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
6572  DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
6573  }
6574 
6575  return Val;
6576 }
6577 
6578 SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
6579  EVT VT = Op.getValueType();
6580  assert(VT.isScalableVector() ||
6581  useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true));
6582 
6583  SDLoc DL(Op);
6584  SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
6585  return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
6586 }
6587 
6588 SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
6589 
6590  if (Op.getValueType().isVector())
6591  return LowerVSETCC(Op, DAG);
6592 
6593  bool IsStrict = Op->isStrictFPOpcode();
6594  bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
6595  unsigned OpNo = IsStrict ? 1 : 0;
6596  SDValue Chain;
6597  if (IsStrict)
6598  Chain = Op.getOperand(0);
6599  SDValue LHS = Op.getOperand(OpNo + 0);
6600  SDValue RHS = Op.getOperand(OpNo + 1);
6601  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
6602  SDLoc dl(Op);
6603 
6604  // We chose ZeroOrOneBooleanContents, so use zero and one.
6605  EVT VT = Op.getValueType();
6606  SDValue TVal = DAG.getConstant(1, dl, VT);
6607  SDValue FVal = DAG.getConstant(0, dl, VT);
6608 
6609  // Handle f128 first, since one possible outcome is a normal integer
6610  // comparison which gets picked up by the next if statement.
6611  if (LHS.getValueType() == MVT::f128) {
6612  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
6613  IsSignaling);
6614 
6615  // If softenSetCCOperands returned a scalar, use it.
6616  if (!RHS.getNode()) {
6617  assert(LHS.getValueType() == Op.getValueType() &&
6618  "Unexpected setcc expansion!");
6619  return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
6620  }
6621  }
6622 
6623  if (LHS.getValueType().isInteger()) {
6624  SDValue CCVal;
6626  LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
6627 
6628  // Note that we inverted the condition above, so we reverse the order of
6629  // the true and false operands here. This will allow the setcc to be
6630  // matched to a single CSINC instruction.
6631  SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
6632  return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
6633  }
6634 
6635  // Now we know we're dealing with FP values.
6636  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
6637  LHS.getValueType() == MVT::f64);
6638 
6639  // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
6640  // and do the comparison.
6641  SDValue Cmp;
6642  if (IsStrict)
6643  Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
6644  else
6645  Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
6646 
6647  AArch64CC::CondCode CC1, CC2;
6648  changeFPCCToAArch64CC(CC, CC1, CC2);
6649  SDValue Res;
6650  if (CC2 == AArch64CC::AL) {
6652  CC2);
6653  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
6654 
6655  // Note that we inverted the condition above, so we reverse the order of
6656  // the true and false operands here. This will allow the setcc to be
6657  // matched to a single CSINC instruction.
6658  Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
6659  } else {
6660  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
6661  // totally clean. Some of them require two CSELs to implement. As is in
6662  // this case, we emit the first CSEL and then emit a second using the output
6663  // of the first as the RHS. We're effectively OR'ing the two CC's together.
6664 
6665  // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
6666  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
6667  SDValue CS1 =
6668  DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
6669 
6670  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
6671  Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
6672  }
6673  return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
6674 }
6675 
6676 SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
6677  SDValue RHS, SDValue TVal,
6678  SDValue FVal, const SDLoc &dl,
6679  SelectionDAG &DAG) const {
6680  // Handle f128 first, because it will result in a comparison of some RTLIB
6681  // call result against zero.
6682  if (LHS.getValueType() == MVT::f128) {
6683  softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
6684 
6685  // If softenSetCCOperands returned a scalar, we need to compare the result
6686  // against zero to select between true and false values.
6687  if (!RHS.getNode()) {
6688  RHS = DAG.getConstant(0, dl, LHS.getValueType());
6689  CC = ISD::SETNE;
6690  }
6691  }
6692 
6693  // Also handle f16, for which we need to do a f32 comparison.
6694  if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
6695  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
6696  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
6697  }
6698 
6699  // Next, handle integers.
6700  if (LHS.getValueType().isInteger()) {
6701  assert((LHS.getValueType() == RHS.getValueType()) &&
6702  (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
6703 
6704  unsigned Opcode = AArch64ISD::CSEL;
6705 
6706  // If both the TVal and the FVal are constants, see if we can swap them in
6707  // order to for a CSINV or CSINC out of them.
6708  ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
6709  ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
6710 
6711  if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
6712  std::swap(TVal, FVal);
6713  std::swap(CTVal, CFVal);
6714  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
6715  } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
6716  std::swap(TVal, FVal);
6717  std::swap(CTVal, CFVal);
6718  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
6719  } else if (TVal.getOpcode() == ISD::XOR) {
6720  // If TVal is a NOT we want to swap TVal and FVal so that we can match
6721  // with a CSINV rather than a CSEL.
6722  if (isAllOnesConstant(TVal.getOperand(1))) {
6723  std::swap(TVal, FVal);
6724  std::swap(CTVal, CFVal);
6725  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
6726  }
6727  } else if (TVal.getOpcode() == ISD::SUB) {
6728  // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
6729  // that we can match with a CSNEG rather than a CSEL.
6730  if (isNullConstant(TVal.getOperand(0))) {
6731  std::swap(TVal, FVal);
6732  std::swap(CTVal, CFVal);
6733  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
6734  }
6735  } else if (CTVal && CFVal) {
6736  const int64_t TrueVal = CTVal->getSExtValue();
6737  const int64_t FalseVal = CFVal->getSExtValue();
6738  bool Swap = false;
6739 
6740  // If both TVal and FVal are constants, see if FVal is the
6741  // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
6742  // instead of a CSEL in that case.
6743  if (TrueVal == ~FalseVal) {
6744  Opcode = AArch64ISD::CSINV;
6746  TrueVal == -FalseVal) {
6747  Opcode = AArch64ISD::CSNEG;
6748  } else if (TVal.getValueType() == MVT::i32) {
6749  // If our operands are only 32-bit wide, make sure we use 32-bit
6750  // arithmetic for the check whether we can use CSINC. This ensures that
6751  // the addition in the check will wrap around properly in case there is
6752  // an overflow (which would not be the case if we do the check with
6753  // 64-bit arithmetic).
6754  const uint32_t TrueVal32 = CTVal->getZExtValue();
6755  const uint32_t FalseVal32 = CFVal->getZExtValue();
6756 
6757  if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
6758  Opcode = AArch64ISD::CSINC;
6759 
6760  if (TrueVal32 > FalseVal32) {
6761  Swap = true;
6762  }
6763  }
6764  // 64-bit check whether we can use CSINC.
6765  } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
6766  Opcode = AArch64ISD::CSINC;
6767 
6768  if (TrueVal > FalseVal) {
6769  Swap = true;
6770  }
6771  }
6772 
6773  // Swap TVal and FVal if necessary.
6774  if (Swap) {
6775  std::swap(TVal, FVal);
6776  std::swap(CTVal, CFVal);
6777  CC = ISD::getSetCCInverse(CC, LHS.getValueType());
6778  }
6779 
6780  if (Opcode != AArch64ISD::CSEL) {
6781  // Drop FVal since we can get its value by simply inverting/negating
6782  // TVal.
6783  FVal = TVal;
6784  }
6785  }
6786 
6787  // Avoid materializing a constant when possible by reusing a known value in
6788  // a register. However, don't perform this optimization if the known value
6789  // is one, zero or negative one in the case of a CSEL. We can always
6790  // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
6791  // FVal, respectively.
6792  ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
6793  if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
6794  !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
6796  // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
6797  // "a != C ? x : a" to avoid materializing C.
6798  if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
6799  TVal = LHS;
6800  else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
6801  FVal = LHS;
6802  } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
6803  assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
6804  // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
6805  // avoid materializing C.
6807  if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
6808  Opcode = AArch64ISD::CSINV;
6809  TVal = LHS;
6810  FVal = DAG.getConstant(0, dl, FVal.getValueType());
6811  }
6812  }
6813 
6814  SDValue CCVal;
6815  SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
6816  EVT VT = TVal.getValueType();
6817  return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
6818  }
6819 
6820  // Now we know we're dealing with FP values.
6821  assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||
6822  LHS.getValueType() == MVT::f64);
6823  assert(LHS.getValueType() == RHS.getValueType());
6824  EVT VT = TVal.getValueType();
6825  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
6826 
6827  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
6828  // clean. Some of them require two CSELs to implement.
6829  AArch64CC::CondCode CC1, CC2;
6830  changeFPCCToAArch64CC(CC, CC1, CC2);
6831 
6832  if (DAG.getTarget().Options.UnsafeFPMath) {
6833  // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
6834  // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
6835  ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
6836  if (RHSVal && RHSVal->isZero()) {
6837  ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
6838  ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
6839 
6840  if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
6841  CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
6842  TVal = LHS;
6843  else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
6844  CFVal && CFVal->isZero() &&
6845  FVal.getValueType() == LHS.getValueType())
6846  FVal = LHS;
6847  }
6848  }
6849 
6850  // Emit first, and possibly only, CSEL.
6851  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
6852  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
6853 
6854  // If we need a second CSEL, emit it, using the output of the first as the
6855  // RHS. We're effectively OR'ing the two CC's together.
6856  if (CC2 != AArch64CC::AL) {
6857  SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
6858  return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
6859  }
6860 
6861  // Otherwise, return the output of the first CSEL.
6862  return CS1;
6863 }
6864 
6865 SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
6866  SelectionDAG &DAG) const {
6867  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
6868  SDValue LHS = Op.getOperand(0);
6869  SDValue RHS = Op.getOperand(1);
6870  SDValue TVal = Op.getOperand(2);
6871  SDValue FVal = Op.getOperand(3);
6872  SDLoc DL(Op);
6873  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
6874 }
6875 
6876 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
6877  SelectionDAG &DAG) const {
6878  SDValue CCVal = Op->getOperand(0);
6879  SDValue TVal = Op->getOperand(1);
6880  SDValue FVal = Op->getOperand(2);
6881  SDLoc DL(Op);
6882 
6883  EVT Ty = Op.getValueType();
6884  if (Ty.isScalableVector()) {
6885  SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
6887  SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
6888  return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
6889  }
6890 
6891  // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
6892  // instruction.
6893  if (ISD::isOverflowIntrOpRes(CCVal)) {
6894  // Only lower legal XALUO ops.
6895  if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
6896  return SDValue();
6897 
6898  AArch64CC::CondCode OFCC;
6899  SDValue Value, Overflow;
6900  std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
6901  SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
6902 
6903  return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
6904  CCVal, Overflow);
6905  }
6906 
6907  // Lower it the same way as we would lower a SELECT_CC node.
6908  ISD::CondCode CC;
6909  SDValue LHS, RHS;
6910  if (CCVal.getOpcode() == ISD::SETCC) {
6911  LHS = CCVal.getOperand(0);
6912  RHS = CCVal.getOperand(1);
6913  CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
6914  } else {
6915  LHS = CCVal;
6916  RHS = DAG.getConstant(0, DL, CCVal.getValueType());
6917  CC = ISD::SETNE;
6918  }
6919  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
6920 }
6921 
6922 SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
6923  SelectionDAG &DAG) const {
6924  // Jump table entries as PC relative offsets. No additional tweaking
6925  // is necessary here. Just get the address of the jump table.
6926  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
6927 
6929  !Subtarget->isTargetMachO()) {
6930  return getAddrLarge(JT, DAG);
6931  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6932  return getAddrTiny(JT, DAG);
6933  }
6934  return getAddr(JT, DAG);
6935 }
6936 
6937 SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
6938  SelectionDAG &DAG) const {
6939  // Jump table entries as PC relative offsets. No additional tweaking
6940  // is necessary here. Just get the address of the jump table.
6941  SDLoc DL(Op);
6942  SDValue JT = Op.getOperand(1);
6943  SDValue Entry = Op.getOperand(2);
6944  int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
6945 
6946  auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
6947  AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
6948 
6949  SDNode *Dest =
6950  DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
6951  Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
6952  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
6953  SDValue(Dest, 0));
6954 }
6955 
6956 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
6957  SelectionDAG &DAG) const {
6958  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
6959 
6961  // Use the GOT for the large code model on iOS.
6962  if (Subtarget->isTargetMachO()) {
6963  return getGOT(CP, DAG);
6964  }
6965  return getAddrLarge(CP, DAG);
6966  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6967  return getAddrTiny(CP, DAG);
6968  } else {
6969  return getAddr(CP, DAG);
6970  }
6971 }
6972 
6973 SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
6974  SelectionDAG &DAG) const {
6975  BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
6977  !Subtarget->isTargetMachO()) {
6978  return getAddrLarge(BA, DAG);
6979  } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6980  return getAddrTiny(BA, DAG);
6981  }
6982  return getAddr(BA, DAG);
6983 }
6984 
6985 SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
6986  SelectionDAG &DAG) const {
6987  AArch64FunctionInfo *FuncInfo =
6989 
6990  SDLoc DL(Op);
6991  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
6992  getPointerTy(DAG.getDataLayout()));
6993  FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
6994  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
6995  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
6996  MachinePointerInfo(SV));
6997 }
6998 
6999 SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
7000  SelectionDAG &DAG) const {
7001  AArch64FunctionInfo *FuncInfo =
7003 
7004  SDLoc DL(Op);
7005  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
7006  ? FuncInfo->getVarArgsGPRIndex()
7007  : FuncInfo->getVarArgsStackIndex(),
7008  getPointerTy(DAG.getDataLayout()));
7009  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7010  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7011  MachinePointerInfo(SV));
7012 }
7013 
7014 SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
7015  SelectionDAG &DAG) const {
7016  // The layout of the va_list struct is specified in the AArch64 Procedure Call
7017  // Standard, section B.3.
7018  MachineFunction &MF = DAG.getMachineFunction();
7020  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7021  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7022  auto PtrVT = getPointerTy(DAG.getDataLayout());
7023  SDLoc DL(Op);
7024 
7025  SDValue Chain = Op.getOperand(0);
7026  SDValue VAList = Op.getOperand(1);
7027  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7028  SmallVector<SDValue, 4> MemOps;
7029 
7030  // void *__stack at offset 0
7031  unsigned Offset = 0;
7032  SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
7033  Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
7034  MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
7035  MachinePointerInfo(SV), Align(PtrSize)));
7036 
7037  // void *__gr_top at offset 8 (4 on ILP32)
7038  Offset += PtrSize;
7039  int GPRSize = FuncInfo->getVarArgsGPRSize();
7040  if (GPRSize > 0) {
7041  SDValue GRTop, GRTopAddr;
7042 
7043  GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7044  DAG.getConstant(Offset, DL, PtrVT));
7045 
7046  GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
7047  GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
7048  DAG.getConstant(GPRSize, DL, PtrVT));
7049  GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
7050 
7051  MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
7053  Align(PtrSize)));
7054  }
7055 
7056  // void *__vr_top at offset 16 (8 on ILP32)
7057  Offset += PtrSize;
7058  int FPRSize = FuncInfo->getVarArgsFPRSize();
7059  if (FPRSize > 0) {
7060  SDValue VRTop, VRTopAddr;
7061  VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7062  DAG.getConstant(Offset, DL, PtrVT));
7063 
7064  VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
7065  VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
7066  DAG.getConstant(FPRSize, DL, PtrVT));
7067  VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
7068 
7069  MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
7071  Align(PtrSize)));
7072  }
7073 
7074  // int __gr_offs at offset 24 (12 on ILP32)
7075  Offset += PtrSize;
7076  SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7077  DAG.getConstant(Offset, DL, PtrVT));
7078  MemOps.push_back(
7079  DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
7080  GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7081 
7082  // int __vr_offs at offset 28 (16 on ILP32)
7083  Offset += 4;
7084  SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7085  DAG.getConstant(Offset, DL, PtrVT));
7086  MemOps.push_back(
7087  DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
7088  VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7089 
7090  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
7091 }
7092 
7093 SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
7094  SelectionDAG &DAG) const {
7095  MachineFunction &MF = DAG.getMachineFunction();
7096 
7097  if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
7098  return LowerWin64_VASTART(Op, DAG);
7099  else if (Subtarget->isTargetDarwin())
7100  return LowerDarwin_VASTART(Op, DAG);
7101  else
7102  return LowerAAPCS_VASTART(Op, DAG);
7103 }
7104 
7105 SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
7106  SelectionDAG &DAG) const {
7107  // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
7108  // pointer.
7109  SDLoc DL(Op);
7110  unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7111  unsigned VaListSize =
7112  (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7113  ? PtrSize
7114  : Subtarget->isTargetILP32() ? 20 : 32;
7115  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
7116  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7117 
7118  return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
7119  DAG.getConstant(VaListSize, DL, MVT::i32),
7120  Align(PtrSize), false, false, false,
7121  MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
7122 }
7123 
7124 SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
7125  assert(Subtarget->isTargetDarwin() &&
7126  "automatic va_arg instruction only works on Darwin");
7127 
7128  const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7129  EVT VT = Op.getValueType();
7130  SDLoc DL(Op);
7131  SDValue Chain = Op.getOperand(0);
7132  SDValue Addr = Op.getOperand(1);
7133  MaybeAlign Align(Op.getConstantOperandVal(3));
7134  unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
7135  auto PtrVT = getPointerTy(DAG.getDataLayout());
7136  auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7137  SDValue VAList =
7138  DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
7139  Chain = VAList.getValue(1);
7140  VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
7141 
7142  if (VT.isScalableVector())
7143  report_fatal_error("Passing SVE types to variadic functions is "
7144  "currently not supported");
7145 
7146  if (Align && *Align > MinSlotSize) {
7147  VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7148  DAG.getConstant(Align->value() - 1, DL, PtrVT));
7149  VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
7150  DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
7151  }
7152 
7153  Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
7154  unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
7155 
7156  // Scalar integer and FP values smaller than 64 bits are implicitly extended
7157  // up to 64 bits. At the very least, we have to increase the striding of the
7158  // vaargs list to match this, and for FP values we need to introduce
7159  // FP_ROUND nodes as well.
7160  if (VT.isInteger() && !VT.isVector())
7161  ArgSize = std::max(ArgSize, MinSlotSize);
7162  bool NeedFPTrunc = false;
7163  if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
7164  ArgSize = 8;
7165  NeedFPTrunc = true;
7166  }
7167 
7168  // Increment the pointer, VAList, to the next vaarg
7169  SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7170  DAG.getConstant(ArgSize, DL, PtrVT));
7171  VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
7172 
7173  // Store the incremented VAList to the legalized pointer
7174  SDValue APStore =
7175  DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
7176 
7177  // Load the actual argument out of the pointer VAList
7178  if (NeedFPTrunc) {
7179  // Load the value as an f64.
7180  SDValue WideFP =
7181  DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
7182  // Round the value down to an f32.
7183  SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
7184  DAG.getIntPtrConstant(1, DL));
7185  SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
7186  // Merge the rounded value with the chain output of the load.
7187  return DAG.getMergeValues(Ops, DL);
7188  }
7189 
7190  return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
7191 }
7192 
7193 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
7194  SelectionDAG &DAG) const {
7196  MFI.setFrameAddressIsTaken(true);
7197 
7198  EVT VT = Op.getValueType();
7199  SDLoc DL(Op);
7200  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7201  SDValue FrameAddr =
7202  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
7203  while (Depth--)
7204  FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
7205  MachinePointerInfo());
7206 
7207  if (Subtarget->isTargetILP32())
7208  FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
7209  DAG.getValueType(VT));
7210 
7211  return FrameAddr;
7212 }
7213 
7214 SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
7215  SelectionDAG &DAG) const {
7217 
7218  EVT VT = getPointerTy(DAG.getDataLayout());
7219  SDLoc DL(Op);
7220  int FI = MFI.CreateFixedObject(4, 0, false);
7221  return DAG.getFrameIndex(FI, VT);
7222 }
7223 
7224 #define GET_REGISTER_MATCHER
7225 #include "AArch64GenAsmMatcher.inc"
7226 
7227 // FIXME? Maybe this could be a TableGen attribute on some registers and
7228 // this table could be generated automatically from RegInfo.
7229 Register AArch64TargetLowering::
7230 getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
7232  if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
7233  const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
7234  unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
7235  if (!Subtarget->isXRegisterReserved(DwarfRegNum))
7236  Reg = 0;
7237  }
7238  if (Reg)
7239  return Reg;
7240  report_fatal_error(Twine("Invalid register name \""
7241  + StringRef(RegName) + "\"."));
7242 }
7243 
7244 SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
7245  SelectionDAG &DAG) const {
7247 
7248  EVT VT = Op.getValueType();
7249  SDLoc DL(Op);
7250 
7251  SDValue FrameAddr =
7252  DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
7254 
7255  return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
7256 }
7257 
7258 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
7259  SelectionDAG &DAG) const {
7260  MachineFunction &MF = DAG.getMachineFunction();
7261  MachineFrameInfo &MFI = MF.getFrameInfo();
7262  MFI.setReturnAddressIsTaken(true);
7263 
7264  EVT VT = Op.getValueType();
7265  SDLoc DL(Op);
7266  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7267  SDValue ReturnAddress;
7268  if (Depth) {
7269  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
7271  ReturnAddress = DAG.getLoad(
7272  VT, DL, DAG.getEntryNode(),
7273  DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
7274  } else {
7275  // Return LR, which contains the return address. Mark it an implicit
7276  // live-in.
7277  unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
7278  ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
7279  }
7280 
7281  // The XPACLRI instruction assembles to a hint-space instruction before
7282  // Armv8.3-A therefore this instruction can be safely used for any pre
7283  // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
7284  // that instead.
7285  SDNode *St;
7286  if (Subtarget->hasPAuth()) {
7287  St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
7288  } else {
7289  // XPACLRI operates on LR therefore we must move the operand accordingly.
7290  SDValue Chain =
7291  DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
7292  St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
7293  }
7294  return SDValue(St, 0);
7295 }
7296 
7297 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
7298 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
7299 SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
7300  SelectionDAG &DAG) const {
7301  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
7302  EVT VT = Op.getValueType();
7303  unsigned VTBits = VT.getSizeInBits();
7304  SDLoc dl(Op);
7305  SDValue ShOpLo = Op.getOperand(0);
7306  SDValue ShOpHi = Op.getOperand(1);
7307  SDValue ShAmt = Op.getOperand(2);
7308  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
7309 
7310  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
7311 
7312  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
7313  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
7314  SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
7315 
7316  // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
7317  // is "undef". We wanted 0, so CSEL it directly.
7318  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
7319  ISD::SETEQ, dl, DAG);
7320  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
7321  HiBitsForLo =
7322  DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
7323  HiBitsForLo, CCVal, Cmp);
7324 
7325  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
7326  DAG.getConstant(VTBits, dl, MVT::i64));
7327 
7328  SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
7329  SDValue LoForNormalShift =
7330  DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
7331 
7332  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
7333  dl, DAG);
7334  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
7335  SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
7336  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
7337  LoForNormalShift, CCVal, Cmp);
7338 
7339  // AArch64 shifts larger than the register width are wrapped rather than
7340  // clamped, so we can't just emit "hi >> x".
7341  SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
7342  SDValue HiForBigShift =
7343  Opc == ISD::SRA
7344  ? DAG.getNode(Opc, dl, VT, ShOpHi,
7345  DAG.getConstant(VTBits - 1, dl, MVT::i64))
7346  : DAG.getConstant(0, dl, VT);
7347  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
7348  HiForNormalShift, CCVal, Cmp);
7349 
7350  SDValue Ops[2] = { Lo, Hi };
7351  return DAG.getMergeValues(Ops, dl);
7352 }
7353 
7354 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
7355 /// i64 values and take a 2 x i64 value to shift plus a shift amount.
7356 SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
7357  SelectionDAG &DAG) const {
7358  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
7359  EVT VT = Op.getValueType();
7360  unsigned VTBits = VT.getSizeInBits();
7361  SDLoc dl(Op);
7362  SDValue ShOpLo = Op.getOperand(0);
7363  SDValue ShOpHi = Op.getOperand(1);
7364  SDValue ShAmt = Op.getOperand(2);
7365 
7366  assert(Op.getOpcode() == ISD::SHL_PARTS);
7367  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
7368  DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
7369  SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
7370 
7371  // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
7372  // is "undef". We wanted 0, so CSEL it directly.
7373  SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
7374  ISD::SETEQ, dl, DAG);
7375  SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
7376  LoBitsForHi =
7377  DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
7378  LoBitsForHi, CCVal, Cmp);
7379 
7380  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
7381  DAG.getConstant(VTBits, dl, MVT::i64));
7382  SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
7383  SDValue HiForNormalShift =
7384  DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
7385 
7386  SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
7387 
7388  Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
7389  dl, DAG);
7390  CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
7391  SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
7392  HiForNormalShift, CCVal, Cmp);
7393 
7394  // AArch64 shifts of larger than register sizes are wrapped rather than
7395  // clamped, so we can't just emit "lo << a" if a is too big.
7396  SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
7397  SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
7398  SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
7399  LoForNormalShift, CCVal, Cmp);
7400 
7401  SDValue Ops[2] = { Lo, Hi };
7402  return DAG.getMergeValues(Ops, dl);
7403 }
7404 
7406  const GlobalAddressSDNode *GA) const {
7407  // Offsets are folded in the DAG combine rather than here so that we can
7408  // intelligently choose an offset based on the uses.
7409  return false;
7410 }
7411 
7413  bool OptForSize) const {
7414  bool IsLegal = false;
7415  // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
7416  // 16-bit case when target has full fp16 support.
7417  // FIXME: We should be able to handle f128 as well with a clever lowering.
7418  const APInt ImmInt = Imm.bitcastToAPInt();
7419  if (VT == MVT::f64)
7420  IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
7421  else if (VT == MVT::f32)
7422  IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
7423  else if (VT == MVT::f16 && Subtarget->hasFullFP16())
7424  IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
7425  // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
7426  // generate that fmov.
7427 
7428  // If we can not materialize in immediate field for fmov, check if the
7429  // value can be encoded as the immediate operand of a logical instruction.
7430  // The immediate value will be created with either MOVZ, MOVN, or ORR.
7431  if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
7432  // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
7433  // however the mov+fmov sequence is always better because of the reduced
7434  // cache pressure. The timings are still the same if you consider
7435  // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
7436  // movw+movk is fused). So we limit up to 2 instrdduction at most.
7439  Insn);
7440  unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
7441  IsLegal = Insn.size() <= Limit;
7442  }
7443 
7444  LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
7445  << " imm value: "; Imm.dump(););
7446  return IsLegal;
7447 }
7448 
7449 //===----------------------------------------------------------------------===//
7450 // AArch64 Optimization Hooks
7451 //===----------------------------------------------------------------------===//
7452 
7453 static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
7454  SDValue Operand, SelectionDAG &DAG,
7455  int &ExtraSteps) {
7456  EVT VT = Operand.getValueType();
7457  if (ST->hasNEON() &&
7458  (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
7459  VT == MVT::f32 || VT == MVT::v1f32 ||
7460  VT == MVT::v2f32 || VT == MVT::v4f32)) {
7461  if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
7462  // For the reciprocal estimates, convergence is quadratic, so the number
7463  // of digits is doubled after each iteration. In ARMv8, the accuracy of
7464  // the initial estimate is 2^-8. Thus the number of extra steps to refine
7465  // the result for float (23 mantissa bits) is 2 and for double (52
7466  // mantissa bits) is 3.
7467  ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
7468 
7469  return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
7470  }
7471 
7472  return SDValue();
7473 }
7474 
7475 SDValue
7476 AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7477  const DenormalMode &Mode) const {
7478  SDLoc DL(Op);
7479  EVT VT = Op.getValueType();
7480  EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7481  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7482  return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7483 }
7484 
7485 SDValue
7486 AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
7487  SelectionDAG &DAG) const {
7488  return Op;
7489 }
7490 
7491 SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
7492  SelectionDAG &DAG, int Enabled,
7493  int &ExtraSteps,
7494  bool &UseOneConst,
7495  bool Reciprocal) const {
7497  (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
7498  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
7499  DAG, ExtraSteps)) {
7500  SDLoc DL(Operand);
7501  EVT VT = Operand.getValueType();
7502 
7503  SDNodeFlags Flags;
7504  Flags.setAllowReassociation(true);
7505 
7506  // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
7507  // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
7508  for (int i = ExtraSteps; i > 0; --i) {
7509  SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
7510  Flags);
7511  Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
7512  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
7513  }
7514  if (!Reciprocal)
7515  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
7516 
7517  ExtraSteps = 0;
7518  return Estimate;
7519  }
7520 
7521  return SDValue();
7522 }
7523 
7524 SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
7525  SelectionDAG &DAG, int Enabled,
7526  int &ExtraSteps) const {
7528  if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
7529  DAG, ExtraSteps)) {
7530  SDLoc DL(Operand);
7531  EVT VT = Operand.getValueType();
7532 
7533  SDNodeFlags Flags;
7534  Flags.setAllowReassociation(true);
7535 
7536  // Newton reciprocal iteration: E * (2 - X * E)
7537  // AArch64 reciprocal iteration instruction: (2 - M * N)
7538  for (int i = ExtraSteps; i > 0; --i) {
7539  SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
7540  Estimate, Flags);
7541  Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
7542  }
7543 
7544  ExtraSteps = 0;
7545  return Estimate;
7546  }
7547 
7548  return SDValue();
7549 }
7550 
7551 //===----------------------------------------------------------------------===//
7552 // AArch64 Inline Assembly Support
7553 //===----------------------------------------------------------------------===//
7554 
7555 // Table of Constraints
7556 // TODO: This is the current set of constraints supported by ARM for the
7557 // compiler, not all of them may make sense.
7558 //
7559 // r - A general register
7560 // w - An FP/SIMD register of some size in the range v0-v31
7561 // x - An FP/SIMD register of some size in the range v0-v15
7562 // I - Constant that can be used with an ADD instruction
7563 // J - Constant that can be used with a SUB instruction
7564 // K - Constant that can be used with a 32-bit logical instruction
7565 // L - Constant that can be used with a 64-bit logical instruction
7566 // M - Constant that can be used as a 32-bit MOV immediate
7567 // N - Constant that can be used as a 64-bit MOV immediate
7568 // Q - A memory reference with base register and no offset
7569 // S - A symbolic address
7570 // Y - Floating point constant zero
7571 // Z - Integer constant zero
7572 //
7573 // Note that general register operands will be output using their 64-bit x
7574 // register name, whatever the size of the variable, unless the asm operand
7575 // is prefixed by the %w modifier. Floating-point and SIMD register operands
7576 // will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
7577 // %q modifier.
7578 const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
7579  // At this point, we have to lower this constraint to something else, so we
7580  // lower it to an "r" or "w". However, by doing this we will force the result
7581  // to be in register, while the X constraint is much more permissive.
7582  //
7583  // Although we are correct (we are free to emit anything, without
7584  // constraints), we might break use cases that would expect us to be more
7585  // efficient and emit something else.
7586  if (!Subtarget->hasFPARMv8())
7587  return "r";
7588 
7589  if (ConstraintVT.isFloatingPoint())
7590  return "w";
7591 
7592  if (ConstraintVT.isVector() &&
7593  (ConstraintVT.getSizeInBits() == 64 ||
7594  ConstraintVT.getSizeInBits() == 128))
7595  return "w";
7596 
7597  return "r";
7598 }
7599 
7603  Invalid
7604 };
7605 
7608  if (Constraint == "Upa")
7610  if (Constraint == "Upl")
7612  return P;
7613 }
7614 
7615 /// getConstraintType - Given a constraint letter, return the type of
7616 /// constraint it is for this target.
7618 AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
7619  if (Constraint.size() == 1) {
7620  switch (Constraint[0]) {
7621  default:
7622  break;
7623  case 'x':
7624  case 'w':
7625  case 'y':
7626  return C_RegisterClass;
7627  // An address with a single base register. Due to the way we
7628  // currently handle addresses it is the same as 'r'.
7629  case 'Q':
7630  return C_Memory;
7631  case 'I':
7632  case 'J':
7633  case 'K':
7634  case 'L':
7635  case 'M':
7636  case 'N':
7637  case 'Y':
7638  case 'Z':
7639  return C_Immediate;
7640  case 'z':
7641  case 'S': // A symbolic address
7642  return C_Other;
7643  }
7644  } else if (parsePredicateConstraint(Constraint) !=
7646  return C_RegisterClass;
7647  return TargetLowering::getConstraintType(Constraint);
7648 }
7649 
7650 /// Examine constraint type and operand type and determine a weight value.
7651 /// This object must already have been set up with the operand type
7652 /// and the current alternative constraint selected.
7654 AArch64TargetLowering::getSingleConstraintMatchWeight(
7655  AsmOperandInfo &info, const char *constraint) const {
7656  ConstraintWeight weight = CW_Invalid;
7657  Value *CallOperandVal = info.CallOperandVal;
7658  // If we don't have a value, we can't do a match,
7659  // but allow it at the lowest weight.
7660  if (!CallOperandVal)
7661  return CW_Default;
7662  Type *type = CallOperandVal->getType();
7663  // Look at the constraint type.
7664  switch (*constraint) {
7665  default:
7667  break;
7668  case 'x':
7669  case 'w':
7670  case 'y':
7671  if (type->isFloatingPointTy() || type->isVectorTy())
7672  weight = CW_Register;
7673  break;
7674  case 'z':
7675  weight = CW_Constant;
7676  break;
7677  case 'U':
7679  weight = CW_Register;
7680  break;
7681  }
7682  return weight;
7683 }
7684 
7685 std::pair<unsigned, const TargetRegisterClass *>
7686 AArch64TargetLowering::getRegForInlineAsmConstraint(
7687  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
7688  if (Constraint.size() == 1) {
7689  switch (Constraint[0]) {
7690  case 'r':
7691  if (VT.isScalableVector())
7692  return std::make_pair(0U, nullptr);
7693  if (VT.getFixedSizeInBits() == 64)
7694  return std::make_pair(0U, &AArch64::GPR64commonRegClass);
7695  return std::make_pair(0U, &AArch64::GPR32commonRegClass);
7696  case 'w': {
7697  if (!Subtarget->hasFPARMv8())
7698  break;
7699  if (VT.isScalableVector()) {
7700  if (VT.getVectorElementType() != MVT::i1)
7701  return std::make_pair(0U, &AArch64::ZPRRegClass);
7702  return std::make_pair(0U, nullptr);
7703  }
7704  uint64_t VTSize = VT.getFixedSizeInBits();
7705  if (VTSize == 16)
7706  return std::make_pair(0U, &AArch64::FPR16RegClass);
7707  if (VTSize == 32)
7708  return std::make_pair(0U, &AArch64::FPR32RegClass);
7709  if (VTSize == 64)
7710  return std::make_pair(0U, &AArch64::FPR64RegClass);
7711  if (VTSize == 128)
7712  return std::make_pair(0U, &AArch64::FPR128RegClass);
7713  break;
7714  }
7715  // The instructions that this constraint is designed for can
7716  // only take 128-bit registers so just use that regclass.
7717  case 'x':
7718  if (!Subtarget->hasFPARMv8())
7719  break;
7720  if (VT.isScalableVector())
7721  return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
7722  if (VT.getSizeInBits() == 128)
7723  return std::make_pair(0U, &AArch64::FPR128_loRegClass);
7724  break;
7725  case 'y':
7726  if (!Subtarget->hasFPARMv8())
7727  break;
7728  if (VT.isScalableVector())
7729  return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
7730  break;
7731  }
7732  } else {
7734  if (PC != PredicateConstraint::Invalid) {
7735  if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
7736  return std::make_pair(0U, nullptr);
7737  bool restricted = (PC == PredicateConstraint::Upl);
7738  return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
7739  : std::make_pair(0U, &AArch64::PPRRegClass);
7740  }
7741  }
7742  if (StringRef("{cc}").equals_lower(Constraint))
7743  return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
7744 
7745  // Use the default implementation in TargetLowering to convert the register
7746  // constraint into a member of a register class.
7747  std::pair<unsigned, const TargetRegisterClass *> Res;
7748  Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
7749 
7750  // Not found as a standard register?
7751  if (!Res.second) {
7752  unsigned Size = Constraint.size();
7753  if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
7754  tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
7755  int RegNo;
7756  bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
7757  if (!Failed && RegNo >= 0 && RegNo <= 31) {
7758  // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
7759  // By default we'll emit v0-v31 for this unless there's a modifier where
7760  // we'll emit the correct register as well.
7761  if (VT != MVT::Other && VT.getSizeInBits() == 64) {
7762  Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
7763  Res.second = &AArch64::FPR64RegClass;
7764  } else {
7765  Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
7766  Res.second = &AArch64::FPR128RegClass;
7767  }
7768  }
7769  }
7770  }
7771 
7772  if (Res.second && !Subtarget->hasFPARMv8() &&
7773  !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
7774  !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
7775  return std::make_pair(0U, nullptr);
7776 
7777  return Res;
7778 }
7779 
7780 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
7781 /// vector. If it is invalid, don't add anything to Ops.
7782 void AArch64TargetLowering::LowerAsmOperandForConstraint(
7783  SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
7784  SelectionDAG &DAG) const {
7785  SDValue Result;
7786 
7787  // Currently only support length 1 constraints.
7788  if (Constraint.length() != 1)
7789  return;
7790 
7791  char ConstraintLetter = Constraint[0];
7792  switch (ConstraintLetter) {
7793  default:
7794  break;
7795 
7796  // This set of constraints deal with valid constants for various instructions.
7797  // Validate and return a target constant for them if we can.
7798  case 'z': {
7799  // 'z' maps to xzr or wzr so it needs an input of 0.
7800  if (!isNullConstant(Op))
7801  return;
7802 
7803  if (Op.getValueType() == MVT::i64)
7804  Result = DAG.getRegister(AArch64::XZR, MVT::i64);
7805  else
7806  Result = DAG.getRegister(AArch64::WZR, MVT::i32);
7807  break;
7808  }
7809  case 'S': {
7810  // An absolute symbolic address or label reference.
7811  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
7813  GA->getValueType(0));
7814  } else if (const BlockAddressSDNode *BA =
7815  dyn_cast<BlockAddressSDNode>(Op)) {
7816  Result =
7818  } else if (const ExternalSymbolSDNode *ES =
7819  dyn_cast<ExternalSymbolSDNode>(Op)) {
7820  Result =
7821  DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
7822  } else
7823  return;
7824  break;
7825  }
7826 
7827  case 'I':
7828  case 'J':
7829  case 'K':
7830  case 'L':
7831  case 'M':
7832  case 'N':
7833  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
7834  if (!C)
7835  return;
7836 
7837  // Grab the value and do some validation.
7838  uint64_t CVal = C->getZExtValue();
7839  switch (ConstraintLetter) {
7840  // The I constraint applies only to simple ADD or SUB immediate operands:
7841  // i.e. 0 to 4095 with optional shift by 12
7842  // The J constraint applies only to ADD or SUB immediates that would be
7843  // valid when negated, i.e. if [an add pattern] were to be output as a SUB
7844  // instruction [or vice versa], in other words -1 to -4095 with optional
7845  // left shift by 12.
7846  case 'I':
7847  if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
7848  break;
7849  return;
7850  case 'J': {
7851  uint64_t NVal = -C->getSExtValue();
7852  if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
7853  CVal = C->getSExtValue();
7854  break;
7855  }
7856  return;
7857  }
7858  // The K and L constraints apply *only* to logical immediates, including
7859  // what used to be the MOVI alias for ORR (though the MOVI alias has now
7860  // been removed and MOV should be used). So these constraints have to
7861  // distinguish between bit patterns that are valid 32-bit or 64-bit
7862  // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
7863  // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
7864  // versa.
7865  case 'K':
7866  if (AArch64_AM::isLogicalImmediate(CVal, 32))
7867  break;
7868  return;
7869  case 'L':
7870  if (AArch64_AM::isLogicalImmediate(CVal, 64))
7871  break;
7872  return;
7873  // The M and N constraints are a superset of K and L respectively, for use
7874  // with the MOV (immediate) alias. As well as the logical immediates they
7875  // also match 32 or 64-bit immediates that can be loaded either using a
7876  // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
7877  // (M) or 64-bit 0x1234000000000000 (N) etc.
7878  // As a note some of this code is liberally stolen from the asm parser.
7879  case 'M': {
7880  if (!isUInt<32>(CVal))
7881  return;
7882  if (AArch64_AM::isLogicalImmediate(CVal, 32))
7883  break;
7884  if ((CVal & 0xFFFF) == CVal)
7885  break;
7886  if ((CVal & 0xFFFF0000ULL) == CVal)
7887  break;
7888  uint64_t NCVal = ~(uint32_t)CVal;
7889  if ((NCVal & 0xFFFFULL) == NCVal)
7890  break;
7891  if ((NCVal & 0xFFFF0000ULL) == NCVal)
7892  break;
7893  return;
7894  }
7895  case 'N': {
7896  if (AArch64_AM::isLogicalImmediate(CVal, 64))
7897  break;
7898  if ((CVal & 0xFFFFULL) == CVal)
7899  break;
7900  if ((CVal & 0xFFFF0000ULL) == CVal)
7901  break;
7902  if ((CVal & 0xFFFF00000000ULL) == CVal)
7903  break;
7904  if ((CVal & 0xFFFF000000000000ULL) == CVal)
7905  break;
7906  uint64_t NCVal = ~CVal;
7907  if ((NCVal & 0xFFFFULL) == NCVal)
7908  break;
7909  if ((NCVal & 0xFFFF0000ULL) == NCVal)
7910  break;
7911  if ((NCVal & 0xFFFF00000000ULL) == NCVal)
7912  break;
7913  if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
7914  break;
7915  return;
7916  }
7917  default:
7918  return;
7919  }
7920 
7921  // All assembler immediates are 64-bit integers.
7922  Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
7923  break;
7924  }
7925 
7926  if (Result.getNode()) {
7927  Ops.push_back(Result);
7928  return;
7929  }
7930 
7931  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
7932 }
7933 
7934 //===----------------------------------------------------------------------===//
7935 // AArch64 Advanced SIMD Support
7936 //===----------------------------------------------------------------------===//
7937 
7938 /// WidenVector - Given a value in the V64 register class, produce the
7939 /// equivalent value in the V128 register class.
7940 static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
7941  EVT VT = V64Reg.getValueType();
7942  unsigned NarrowSize = VT.getVectorNumElements();
7943  MVT EltTy = VT.getVectorElementType().getSimpleVT();
7944  MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
7945  SDLoc DL(V64Reg);
7946 
7947  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
7948  V64Reg, DAG.getConstant(0, DL, MVT::i32));
7949 }
7950 
7951 /// getExtFactor - Determine the adjustment factor for the position when
7952 /// generating an "extract from vector registers" instruction.
7953 static unsigned getExtFactor(SDValue &V) {
7954  EVT EltType = V.getValueType().getVectorElementType();
7955  return EltType.getSizeInBits() / 8;
7956 }
7957 
7958 /// NarrowVector - Given a value in the V128 register class, produce the
7959 /// equivalent value in the V64 register class.
7960 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
7961  EVT VT = V128Reg.getValueType();
7962  unsigned WideSize = VT.getVectorNumElements();
7963  MVT EltTy = VT.getVectorElementType().getSimpleVT();
7964  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
7965  SDLoc DL(V128Reg);
7966 
7967  return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
7968 }
7969 
7970 // Gather data to see if the operation can be modelled as a
7971 // shuffle in combination with VEXTs.
7973  SelectionDAG &DAG) const {
7974  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7975  LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
7976  SDLoc dl(Op);
7977  EVT VT = Op.getValueType();
7978  assert(!VT.isScalableVector() &&
7979  "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
7980  unsigned NumElts = VT.getVectorNumElements();
7981 
7982  struct ShuffleSourceInfo {
7983  SDValue Vec;
7984  unsigned MinElt;
7985  unsigned MaxElt;
7986 
7987  // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
7988  // be compatible with the shuffle we intend to construct. As a result
7989  // ShuffleVec will be some sliding window into the original Vec.
7990  SDValue ShuffleVec;
7991 
7992  // Code should guarantee that element i in Vec starts at element "WindowBase
7993  // + i * WindowScale in ShuffleVec".
7994  int WindowBase;
7995  int WindowScale;
7996 
7997  ShuffleSourceInfo(SDValue Vec)
7998  : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
7999  ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
8000 
8001  bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8002  };
8003 
8004  // First gather all vectors used as an immediate source for this BUILD_VECTOR
8005  // node.
8007  for (unsigned i = 0; i < NumElts; ++i) {
8008  SDValue V = Op.getOperand(i);
8009  if (V.isUndef())
8010  continue;
8011  else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
8012  !isa<ConstantSDNode>(V.getOperand(1))) {
8013  LLVM_DEBUG(
8014  dbgs() << "Reshuffle failed: "
8015  "a shuffle can only come from building a vector from "
8016  "various elements of other vectors, provided their "
8017  "indices are constant\n");
8018  return SDValue();
8019  }
8020 
8021  // Add this element source to the list if it's not already there.
8022  SDValue SourceVec = V.getOperand(0);
8023  auto Source = find(Sources, SourceVec);
8024  if (Source == Sources.end())
8025  Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8026 
8027  // Update the minimum and maximum lane number seen.
8028  unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8029  Source->MinElt = std::min(Source->MinElt, EltNo);
8030  Source->MaxElt = std::max(Source->MaxElt, EltNo);
8031  }
8032 
8033  if (Sources.size() > 2) {
8034  LLVM_DEBUG(
8035  dbgs() << "Reshuffle failed: currently only do something sane when at "
8036  "most two source vectors are involved\n");
8037  return SDValue();
8038  }
8039 
8040  // Find out the smallest element size among result and two sources, and use
8041  // it as element size to build the shuffle_vector.
8042  EVT SmallestEltTy = VT.getVectorElementType();
8043  for (auto &Source : Sources) {
8044  EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8045  if (SrcEltTy.bitsLT(SmallestEltTy)) {
8046  SmallestEltTy = SrcEltTy;
8047  }
8048  }
8049  unsigned ResMultiplier =
8050  VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8051  uint64_t VTSize = VT.getFixedSizeInBits();
8052  NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
8053  EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8054 
8055  // If the source vector is too wide or too narrow, we may nevertheless be able
8056  // to construct a compatible shuffle either by concatenating it with UNDEF or
8057  // extracting a suitable range of elements.
8058  for (auto &Src : Sources) {
8059  EVT SrcVT = Src.ShuffleVec.getValueType();
8060 
8061  uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8062  if (SrcVTSize == VTSize)
8063  continue;
8064 
8065  // This stage of the search produces a source with the same element type as
8066  // the original, but with a total width matching the BUILD_VECTOR output.
8067  EVT EltVT = SrcVT.getVectorElementType();
8068  unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8069  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8070 
8071  if (SrcVTSize < VTSize) {
8072  assert(2 * SrcVTSize == VTSize);
8073  // We can pad out the smaller vector for free, so if it's part of a
8074  // shuffle...
8075  Src.ShuffleVec =
8076  DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8077  DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8078  continue;
8079  }
8080 
8081  if (SrcVTSize != 2 * VTSize) {
8082  LLVM_DEBUG(
8083  dbgs() << "Reshuffle failed: result vector too small to extract\n");
8084  return SDValue();
8085  }
8086 
8087  if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8088  LLVM_DEBUG(
8089  dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n");
8090  return SDValue();
8091  }
8092 
8093  if (Src.MinElt >= NumSrcElts) {
8094  // The extraction can just take the second half
8095  Src.ShuffleVec =
8096  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8097  DAG.getConstant(NumSrcElts, dl, MVT::i64));
8098  Src.WindowBase = -NumSrcElts;
8099  } else if (Src.MaxElt < NumSrcElts) {
8100  // The extraction can just take the first half
8101  Src.ShuffleVec =
8102  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8103  DAG.getConstant(0, dl, MVT::i64));
8104  } else {
8105  // An actual VEXT is needed
8106  SDValue VEXTSrc1 =
8107  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8108  DAG.getConstant(0, dl, MVT::i64));
8109  SDValue VEXTSrc2 =
8110  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8111  DAG.getConstant(NumSrcElts, dl, MVT::i64));
8112  unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
8113 
8114  if (!SrcVT.is64BitVector()) {
8115  LLVM_DEBUG(
8116  dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
8117  "for SVE vectors.");
8118  return SDValue();
8119  }
8120 
8121  Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
8122  VEXTSrc2,
8123  DAG.getConstant(Imm, dl, MVT::i32));
8124  Src.WindowBase = -Src.MinElt;
8125  }
8126  }
8127 
8128  // Another possible incompatibility occurs from the vector element types. We
8129  // can fix this by bitcasting the source vectors to the same type we intend
8130  // for the shuffle.
8131  for (auto &Src : Sources) {
8132  EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8133  if (SrcEltTy == SmallestEltTy)
8134  continue;
8135  assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8136  Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
8137  Src.WindowScale =
8138  SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8139  Src.WindowBase *= Src.WindowScale;
8140  }
8141 
8142  // Final sanity check before we try to actually produce a shuffle.
8143  LLVM_DEBUG(for (auto Src
8144  : Sources)
8145  assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8146 
8147  // The stars all align, our next step is to produce the mask for the shuffle.
8149  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8150  for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8151  SDValue Entry = Op.getOperand(i);
8152  if (Entry.isUndef())
8153  continue;
8154 
8155  auto Src = find(Sources, Entry.getOperand(0));
8156  int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8157 
8158  // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8159  // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8160  // segment.
8161  EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8162  int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8163  VT.getScalarSizeInBits());
8164  int LanesDefined = BitsDefined / BitsPerShuffleLane;
8165 
8166  // This source is expected to fill ResMultiplier lanes of the final shuffle,
8167  // starting at the appropriate offset.
8168  int *LaneMask = &Mask[i * ResMultiplier];
8169 
8170  int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8171  ExtractBase += NumElts * (Src - Sources.begin());
8172  for (int j = 0; j < LanesDefined; ++j)
8173  LaneMask[j] = ExtractBase + j;
8174  }
8175 
8176  // Final check before we try to produce nonsense...
8177  if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
8178  LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n");
8179  return SDValue();
8180  }
8181 
8182  SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8183  for (unsigned i = 0; i < Sources.size(); ++i)
8184  ShuffleOps[i] = Sources[i].ShuffleVec;
8185 
8186  SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8187  ShuffleOps[1], Mask);
8188  SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
8189 
8190  LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
8191  dbgs() << "Reshuffle, creating node: "; V.dump(););
8192 
8193  return V;
8194 }
8195 
8196 // check if an EXT instruction can handle the shuffle mask when the
8197 // vector sources of the shuffle are the same.
8198 static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
8199  unsigned NumElts = VT.getVectorNumElements();
8200 
8201  // Assume that the first shuffle index is not UNDEF. Fail if it is.
8202  if (M[0] < 0)
8203  return false;
8204 
8205  Imm = M[0];
8206 
8207  // If this is a VEXT shuffle, the immediate value is the index of the first
8208  // element. The other shuffle indices must be the successive elements after
8209  // the first one.
8210  unsigned ExpectedElt = Imm;
8211  for (unsigned i = 1; i < NumElts; ++i) {
8212  // Increment the expected index. If it wraps around, just follow it
8213  // back to index zero and keep going.
8214  ++ExpectedElt;
8215  if (ExpectedElt == NumElts)
8216  ExpectedElt = 0;
8217 
8218  if (M[i] < 0)
8219  continue; // ignore UNDEF indices
8220  if (ExpectedElt != static_cast<unsigned>(M[i]))
8221  return false;
8222  }
8223 
8224  return true;
8225 }
8226 
8227 /// Check if a vector shuffle corresponds to a DUP instructions with a larger
8228 /// element width than the vector lane type. If that is the case the function
8229 /// returns true and writes the value of the DUP instruction lane operand into
8230 /// DupLaneOp
8231 static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
8232  unsigned &DupLaneOp) {
8233  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
8234  "Only possible block sizes for wide DUP are: 16, 32, 64");
8235 
8236  if (BlockSize <= VT.getScalarSizeInBits())
8237  return false;
8238  if (BlockSize % VT.getScalarSizeInBits() != 0)
8239  return false;
8240  if (VT.getSizeInBits() % BlockSize != 0)
8241  return false;
8242 
8243  size_t SingleVecNumElements = VT.getVectorNumElements();
8244  size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
8245  size_t NumBlocks = VT.getSizeInBits() / BlockSize;
8246 
8247  // We are looking for masks like
8248  // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
8249  // might be replaced by 'undefined'. BlockIndices will eventually contain
8250  // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
8251  // for the above examples)
8252  SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
8253  for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
8254  for (size_t I = 0; I < NumEltsPerBlock; I++) {
8255  int Elt = M[BlockIndex * NumEltsPerBlock + I];
8256  if (Elt < 0)
8257  continue;
8258  // For now we don't support shuffles that use the second operand
8259  if ((unsigned)Elt >= SingleVecNumElements)
8260  return false;
8261  if (BlockElts[I] < 0)
8262  BlockElts[I] = Elt;
8263  else if (BlockElts[I] != Elt)
8264  return false;
8265  }
8266 
8267  // We found a candidate block (possibly with some undefs). It must be a
8268  // sequence of consecutive integers starting with a value divisible by
8269  // NumEltsPerBlock with some values possibly replaced by undef-s.
8270 
8271  // Find first non-undef element
8272  auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
8273  assert(FirstRealEltIter != BlockElts.end() &&
8274  "Shuffle with all-undefs must have been caught by previous cases, "
8275  "e.g. isSplat()");
8276  if (FirstRealEltIter == BlockElts.end()) {
8277  DupLaneOp = 0;
8278  return true;
8279  }
8280 
8281  // Index of FirstRealElt in BlockElts
8282  size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
8283 
8284  if ((unsigned)*FirstRealEltIter < FirstRealIndex)
8285  return false;
8286  // BlockElts[0] must have the following value if it isn't undef:
8287  size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
8288 
8289  // Check the first element
8290  if (Elt0 % NumEltsPerBlock != 0)
8291  return false;
8292  // Check that the sequence indeed consists of consecutive integers (modulo
8293  // undefs)
8294  for (size_t I = 0; I < NumEltsPerBlock; I++)
8295  if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
8296  return false;
8297 
8298  DupLaneOp = Elt0 / NumEltsPerBlock;
8299  return true;
8300 }
8301 
8302 // check if an EXT instruction can handle the shuffle mask when the
8303 // vector sources of the shuffle are different.
8304 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
8305  unsigned &Imm) {
8306  // Look for the first non-undef element.
8307  const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
8308 
8309  // Benefit form APInt to handle overflow when calculating expected element.
8310  unsigned NumElts = VT.getVectorNumElements();
8311  unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
8312  APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
8313  // The following shuffle indices must be the successive elements after the
8314  // first real element.
8315  const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
8316  [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
8317  if (FirstWrongElt != M.end())
8318  return false;
8319 
8320  // The index of an EXT is the first element if it is not UNDEF.
8321  // Watch out for the beginning UNDEFs. The EXT index should be the expected
8322  // value of the first element. E.g.
8323  // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
8324  // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
8325  // ExpectedElt is the last mask index plus 1.
8326  Imm = ExpectedElt.getZExtValue();
8327 
8328  // There are two difference cases requiring to reverse input vectors.
8329  // For example, for vector <4 x i32> we have the following cases,
8330  // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
8331  // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
8332  // For both cases, we finally use mask <5, 6, 7, 0>, which requires
8333  // to reverse two input vectors.
8334  if (Imm < NumElts)
8335  ReverseEXT = true;
8336  else
8337  Imm -= NumElts;
8338 
8339  return true;
8340 }
8341 
8342 /// isREVMask - Check if a vector shuffle corresponds to a REV
8343 /// instruction with the specified blocksize. (The order of the elements
8344 /// within each block of the vector is reversed.)
8345 static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
8346  assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
8347  "Only possible block sizes for REV are: 16, 32, 64");
8348 
8349  unsigned EltSz = VT.getScalarSizeInBits();
8350  if (EltSz == 64)
8351  return false;
8352 
8353  unsigned NumElts = VT.getVectorNumElements();
8354  unsigned BlockElts = M[0] + 1;
8355  // If the first shuffle index is UNDEF, be optimistic.
8356  if (M[0] < 0)
8357  BlockElts = BlockSize / EltSz;
8358 
8359  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
8360  return false;
8361 
8362  for (unsigned i = 0; i < NumElts; ++i) {
8363  if (M[i] < 0)
8364  continue; // ignore UNDEF indices
8365  if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
8366  return false;
8367  }
8368 
8369  return true;
8370 }
8371 
8372 static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8373  unsigned NumElts = VT.getVectorNumElements();
8374  if (NumElts % 2 != 0)
8375  return false;
8376  WhichResult = (M[0] == 0 ? 0 : 1);
8377  unsigned Idx = WhichResult * NumElts / 2;
8378  for (unsigned i = 0; i != NumElts; i += 2) {
8379  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
8380  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
8381  return false;
8382  Idx += 1;
8383  }
8384 
8385  return true;
8386 }
8387 
8388 static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8389  unsigned NumElts = VT.getVectorNumElements();
8390  WhichResult = (M[0] == 0 ? 0 : 1);
8391  for (unsigned i = 0; i != NumElts; ++i) {
8392  if (M[i] < 0)
8393  continue; // ignore UNDEF indices
8394  if ((unsigned)M[i] != 2 * i + WhichResult)
8395  return false;
8396  }
8397 
8398  return true;
8399 }
8400 
8401 static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8402  unsigned NumElts = VT.getVectorNumElements();
8403  if (NumElts % 2 != 0)
8404  return false;
8405  WhichResult = (M[0] == 0 ? 0 : 1);
8406  for (unsigned i = 0; i < NumElts; i += 2) {
8407  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
8408  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
8409  return false;
8410  }
8411  return true;
8412 }
8413 
8414 /// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
8415 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
8416 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
8417 static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8418  unsigned NumElts = VT.getVectorNumElements();
8419  if (NumElts % 2 != 0)
8420  return false;
8421  WhichResult = (M[0] == 0 ? 0 : 1);
8422  unsigned Idx = WhichResult * NumElts / 2;
8423  for (unsigned i = 0; i != NumElts; i += 2) {
8424  if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
8425  (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
8426  return false;
8427  Idx += 1;
8428  }
8429 
8430  return true;
8431 }
8432 
8433 /// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
8434 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
8435 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
8436 static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8437  unsigned Half = VT.getVectorNumElements() / 2;
8438  WhichResult = (M[0] == 0 ? 0 : 1);
8439  for (unsigned j = 0; j != 2; ++j) {
8440  unsigned Idx = WhichResult;
8441  for (unsigned i = 0; i != Half; ++i) {
8442  int MIdx = M[i + j * Half];
8443  if (MIdx >= 0 && (unsigned)MIdx != Idx)
8444  return false;
8445  Idx += 2;
8446  }
8447  }
8448 
8449  return true;
8450 }
8451 
8452 /// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
8453 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
8454 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
8455 static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
8456  unsigned NumElts = VT.getVectorNumElements();
8457  if (NumElts % 2 != 0)
8458  return false;
8459  WhichResult = (M[0] == 0 ? 0 : 1);
8460  for (unsigned i = 0; i < NumElts; i += 2) {
8461  if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
8462  (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
8463  return false;
8464  }
8465  return true;
8466 }
8467 
8468 static bool isINSMask(ArrayRef<int> M, int NumInputElements,
8469  bool &DstIsLeft, int &Anomaly) {
8470  if (M.size() != static_cast<size_t>(NumInputElements))
8471  return false;
8472 
8473  int NumLHSMatch = 0, NumRHSMatch = 0;
8474  int LastLHSMismatch = -1, LastRHSMismatch = -1;
8475 
8476  for (int i = 0; i < NumInputElements; ++i) {
8477  if (M[i] == -1) {
8478  ++NumLHSMatch;
8479  ++NumRHSMatch;
8480  continue;
8481  }
8482 
8483  if (M[i] == i)
8484  ++NumLHSMatch;
8485  else
8486  LastLHSMismatch = i;
8487 
8488  if (M[i] == i + NumInputElements)
8489  ++NumRHSMatch;
8490  else
8491  LastRHSMismatch = i;
8492  }
8493 
8494  if (NumLHSMatch == NumInputElements - 1) {
8495  DstIsLeft = true;
8496  Anomaly = LastLHSMismatch;
8497  return true;
8498  } else if (NumRHSMatch == NumInputElements - 1) {
8499  DstIsLeft = false;
8500  Anomaly = LastRHSMismatch;
8501  return true;
8502  }
8503 
8504  return false;
8505 }
8506 
8507 static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
8508  if (VT.getSizeInBits() != 128)
8509  return false;
8510 
8511  unsigned NumElts = VT.getVectorNumElements();
8512 
8513  for (int I = 0, E = NumElts / 2; I != E; I++) {
8514  if (Mask[I] != I)
8515  return false;
8516  }
8517 
8518  int Offset = NumElts / 2;
8519  for (int I = NumElts / 2, E = NumElts; I != E; I++) {
8520  if (Mask[I] != I + SplitLHS * Offset)
8521  return false;
8522  }
8523 
8524  return true;
8525 }
8526 
8528  SDLoc DL(Op);
8529  EVT VT = Op.getValueType();
8530  SDValue V0 = Op.getOperand(0);
8531  SDValue V1 = Op.getOperand(1);
8532  ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
8533 
8536  return SDValue();
8537 
8538  bool SplitV0 = V0.getValueSizeInBits() == 128;
8539 
8540  if (!isConcatMask(Mask, VT, SplitV0))
8541  return SDValue();
8542 
8543  EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
8544  if (SplitV0) {
8545  V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
8546  DAG.getConstant(0, DL, MVT::i64));
8547  }
8548  if (V1.getValueSizeInBits() == 128) {
8549  V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
8550  DAG.getConstant(0, DL, MVT::i64));
8551  }
8552  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
8553 }
8554 
8555 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8556 /// the specified operations to build the shuffle.
8557 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8558  SDValue RHS, SelectionDAG &DAG,
8559  const SDLoc &dl) {
8560  unsigned OpNum = (PFEntry >> 26) & 0x0F;
8561  unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
8562  unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
8563 
8564  enum {
8565  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8566  OP_VREV,
8567  OP_VDUP0,
8568  OP_VDUP1,
8569  OP_VDUP2,
8570  OP_VDUP3,
8571  OP_VEXT1,
8572  OP_VEXT2,
8573  OP_VEXT3,
8574  OP_VUZPL, // VUZP, left result
8575  OP_VUZPR, // VUZP, right result
8576  OP_VZIPL, // VZIP, left result
8577  OP_VZIPR, // VZIP, right result
8578  OP_VTRNL, // VTRN, left result
8579  OP_VTRNR // VTRN, right result
8580  };
8581 
8582  if (OpNum == OP_COPY) {
8583  if (LHSID == (1 * 9 + 2) * 9 + 3)
8584  return LHS;
8585  assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
8586  return RHS;
8587  }
8588 
8589  SDValue OpLHS, OpRHS;
8590  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8591  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8592  EVT VT = OpLHS.getValueType();
8593 
8594  switch (OpNum) {
8595  default:
8596  llvm_unreachable("Unknown shuffle opcode!");
8597  case OP_VREV:
8598  // VREV divides the vector in half and swaps within the half.
8599  if (VT.getVectorElementType() == MVT::i32 ||
8601  return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
8602  // vrev <4 x i16> -> REV32
8603  if (VT.getVectorElementType() == MVT::i16 ||
8604  VT.getVectorElementType() == MVT::f16 ||
8606  return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
8607  // vrev <4 x i8> -> REV16
8609  return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
8610  case OP_VDUP0:
8611  case OP_VDUP1:
8612  case OP_VDUP2:
8613  case OP_VDUP3: {
8614  EVT EltTy = VT.getVectorElementType();
8615  unsigned Opcode;
8616  if (EltTy == MVT::i8)
8617  Opcode = AArch64ISD::DUPLANE8;
8618  else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
8619  Opcode = AArch64ISD::DUPLANE16;
8620  else if (EltTy == MVT::i32 || EltTy == MVT::f32)
8621  Opcode = AArch64ISD::DUPLANE32;
8622  else if (EltTy == MVT::i64 || EltTy == MVT::f64)
8623  Opcode = AArch64ISD::DUPLANE64;
8624  else
8625  llvm_unreachable("Invalid vector element type?");
8626 
8627  if (VT.getSizeInBits() == 64)
8628  OpLHS = WidenVector(OpLHS, DAG);
8629  SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
8630  return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
8631  }
8632  case OP_VEXT1:
8633  case OP_VEXT2:
8634  case OP_VEXT3: {
8635  unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
8636  return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
8637  DAG.getConstant(Imm, dl, MVT::i32));
8638  }
8639  case OP_VUZPL:
8640  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
8641  OpRHS);
8642  case OP_VUZPR:
8643  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
8644  OpRHS);
8645  case OP_VZIPL:
8646  return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
8647  OpRHS);
8648  case OP_VZIPR:
8649  return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
8650  OpRHS);
8651  case OP_VTRNL:
8652  return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
8653  OpRHS);
8654  case OP_VTRNR:
8655  return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
8656  OpRHS);
8657  }
8658 }
8659 
8661  SelectionDAG &DAG) {
8662  // Check to see if we can use the TBL instruction.
8663  SDValue V1 = Op.getOperand(0);
8664  SDValue V2 = Op.getOperand(1);
8665  SDLoc DL(Op);
8666 
8667  EVT EltVT = Op.getValueType().getVectorElementType();
8668  unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
8669 
8670  SmallVector<SDValue, 8> TBLMask;
8671  for (int Val : ShuffleMask) {
8672  for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
8673  unsigned Offset = Byte + Val * BytesPerElt;
8674  TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
8675  }
8676  }
8677 
8678  MVT IndexVT = MVT::v8i8;
8679  unsigned IndexLen = 8;
8680  if (Op.getValueSizeInBits() == 128) {
8681  IndexVT = MVT::v16i8;
8682  IndexLen = 16;
8683  }
8684 
8685  SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
8686  SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
8687 
8688  SDValue Shuffle;
8689  if (V2.getNode()->isUndef()) {
8690  if (IndexLen == 8)
8691  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
8692  Shuffle = DAG.getNode(
8693  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
8694  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
8695  DAG.getBuildVector(IndexVT, DL,
8696  makeArrayRef(TBLMask.data(), IndexLen)));
8697  } else {
8698  if (IndexLen == 8) {
8699  V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
8700  Shuffle = DAG.getNode(
8701  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
8702  DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
8703  DAG.getBuildVector(IndexVT, DL,
8704  makeArrayRef(TBLMask.data(), IndexLen)));
8705  } else {
8706  // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
8707  // cannot currently represent the register constraints on the input
8708  // table registers.
8709  // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
8710  // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
8711  // IndexLen));
8712  Shuffle = DAG.getNode(
8713  ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
8714  DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
8715  V2Cst, DAG.getBuildVector(IndexVT, DL,
8716  makeArrayRef(TBLMask.data(), IndexLen)));
8717  }
8718  }
8719  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
8720 }
8721 
8722 static unsigned getDUPLANEOp(EVT EltType) {
8723  if (EltType == MVT::i8)
8724  return AArch64ISD::DUPLANE8;
8725  if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
8726  return AArch64ISD::DUPLANE16;
8727  if (EltType == MVT::i32 || EltType == MVT::f32)
8728  return AArch64ISD::DUPLANE32;
8729  if (EltType == MVT::i64 || EltType == MVT::f64)
8730  return AArch64ISD::DUPLANE64;
8731 
8732  llvm_unreachable("Invalid vector element type?");
8733 }
8734 
8735 static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
8736  unsigned Opcode, SelectionDAG &DAG) {
8737  // Try to eliminate a bitcasted extract subvector before a DUPLANE.
8738  auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
8739  // Match: dup (bitcast (extract_subv X, C)), LaneC
8740  if (BitCast.getOpcode() != ISD::BITCAST ||
8742  return false;
8743 
8744  // The extract index must align in the destination type. That may not
8745  // happen if the bitcast is from narrow to wide type.
8746  SDValue Extract = BitCast.getOperand(0);
8747  unsigned ExtIdx = Extract.getConstantOperandVal(1);
8748  unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
8749  unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
8750  unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
8751  if (ExtIdxInBits % CastedEltBitWidth != 0)
8752  return false;
8753 
8754  // Update the lane value by offsetting with the scaled extract index.
8755  LaneC += ExtIdxInBits / CastedEltBitWidth;
8756 
8757  // Determine the casted vector type of the wide vector input.
8758  // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
8759  // Examples:
8760  // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
8761  // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
8762  unsigned SrcVecNumElts =
8763  Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
8764  CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
8765  SrcVecNumElts);
8766  return true;
8767  };
8768  MVT CastVT;
8769  if (getScaledOffsetDup(V, Lane, CastVT)) {
8770  V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
8771  } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
8772  // The lane is incremented by the index of the extract.
8773  // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
8774  Lane += V.getConstantOperandVal(1);
8775  V = V.getOperand(0);
8776  } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
8777  // The lane is decremented if we are splatting from the 2nd operand.
8778  // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
8779  unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
8780  Lane -= Idx * VT.getVectorNumElements() / 2;
8781  V = WidenVector(V.getOperand(Idx), DAG);
8782  } else if (VT.getSizeInBits() == 64) {
8783  // Widen the operand to 128-bit register with undef.
8784  V = WidenVector(V, DAG);
8785  }
8786  return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
8787 }
8788 
8789 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
8790  SelectionDAG &DAG) const {
8791  SDLoc dl(Op);
8792  EVT VT = Op.getValueType();
8793 
8794  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8795 
8796  // Convert shuffles that are directly supported on NEON to target-specific
8797  // DAG nodes, instead of keeping them as shuffles and matching them again
8798  // during code selection. This is more efficient and avoids the possibility
8799  // of inconsistencies between legalization and selection.
8800  ArrayRef<int> ShuffleMask = SVN->getMask();
8801 
8802  SDValue V1 = Op.getOperand(0);
8803  SDValue V2 = Op.getOperand(1);
8804 
8805  if (SVN->isSplat()) {
8806  int Lane = SVN->getSplatIndex();
8807  // If this is undef splat, generate it via "just" vdup, if possible.
8808  if (Lane == -1)
8809  Lane = 0;
8810 
8811  if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
8812  return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
8813  V1.getOperand(0));
8814  // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
8815  // constant. If so, we can just reference the lane's definition directly.
8816  if (V1.getOpcode() == ISD::BUILD_VECTOR &&
8817  !isa<ConstantSDNode>(V1.getOperand(Lane)))
8818  return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
8819 
8820  // Otherwise, duplicate from the lane of the input vector.
8821  unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
8822  return constructDup(V1, Lane, dl, VT, Opcode, DAG);
8823  }
8824 
8825  // Check if the mask matches a DUP for a wider element
8826  for (unsigned LaneSize : {64U, 32U, 16U}) {
8827  unsigned Lane = 0;
8828  if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
8829  unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
8830  : LaneSize == 32 ? AArch64ISD::DUPLANE32
8832  // Cast V1 to an integer vector with required lane size
8833  MVT NewEltTy = MVT::getIntegerVT(LaneSize);
8834  unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
8835  MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
8836  V1 = DAG.getBitcast(NewVecTy, V1);
8837  // Constuct the DUP instruction
8838  V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
8839  // Cast back to the original type
8840  return DAG.getBitcast(VT, V1);
8841  }
8842  }
8843 
8844  if (isREVMask(ShuffleMask, VT, 64))
8845  return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
8846  if (isREVMask(ShuffleMask, VT, 32))
8847  return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
8848  if (isREVMask(ShuffleMask, VT, 16))
8849  return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
8850 
8851  bool ReverseEXT = false;
8852  unsigned Imm;
8853  if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
8854  if (ReverseEXT)
8855  std::swap(V1, V2);
8856  Imm *= getExtFactor(V1);
8857  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
8858  DAG.getConstant(Imm, dl, MVT::i32));
8859  } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
8860  Imm *= getExtFactor(V1);
8861  return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
8862  DAG.getConstant(Imm, dl, MVT::i32));
8863  }
8864 
8865  unsigned WhichResult;
8866  if (isZIPMask(ShuffleMask, VT, WhichResult)) {
8867  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
8868  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
8869  }
8870  if (isUZPMask(ShuffleMask, VT, WhichResult)) {
8871  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
8872  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
8873  }
8874  if (isTRNMask(ShuffleMask, VT, WhichResult)) {
8875  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
8876  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
8877  }
8878 
8879  if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
8880  unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
8881  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
8882  }
8883  if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
8884  unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
8885  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
8886  }
8887  if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
8888  unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
8889  return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
8890  }
8891 
8893  return Concat;
8894 
8895  bool DstIsLeft;
8896  int Anomaly;
8897  int NumInputElements = V1.getValueType().getVectorNumElements();
8898  if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
8899  SDValue DstVec = DstIsLeft ? V1 : V2;
8900  SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
8901 
8902  SDValue SrcVec = V1;
8903  int SrcLane = ShuffleMask[Anomaly];
8904  if (SrcLane >= NumInputElements) {
8905  SrcVec = V2;
8906  SrcLane -= VT.getVectorNumElements();
8907  }
8908  SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
8909 
8910  EVT ScalarVT = VT.getVectorElementType();
8911 
8912  if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
8913  ScalarVT = MVT::i32;
8914 
8915  return DAG.getNode(
8916  ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
8917  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
8918  DstLaneV);
8919  }
8920 
8921  // If the shuffle is not directly supported and it has 4 elements, use
8922  // the PerfectShuffle-generated table to synthesize it from other shuffles.
8923  unsigned NumElts = VT.getVectorNumElements();
8924  if (NumElts == 4) {
8925  unsigned PFIndexes[4];
8926  for (unsigned i = 0; i != 4; ++i) {
8927  if (ShuffleMask[i] < 0)
8928  PFIndexes[i] = 8;
8929  else
8930  PFIndexes[i] = ShuffleMask[i];
8931  }
8932 
8933  // Compute the index in the perfect shuffle table.
8934  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
8935  PFIndexes[2] * 9 + PFIndexes[3];
8936  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8937  unsigned Cost = (PFEntry >> 30);
8938 
8939  if (Cost <= 4)
8940  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8941  }
8942 
8943  return GenerateTBL(Op, ShuffleMask, DAG);
8944 }
8945 
8946 SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
8947  SelectionDAG &DAG) const {
8948  SDLoc dl(Op);
8949  EVT VT = Op.getValueType();
8950  EVT ElemVT = VT.getScalarType();
8951  SDValue SplatVal = Op.getOperand(0);
8952 
8953  if (useSVEForFixedLengthVectorVT(VT))
8954  return LowerToScalableOp(Op, DAG);
8955 
8956  // Extend input splat value where needed to fit into a GPR (32b or 64b only)
8957  // FPRs don't have this restriction.
8958  switch (ElemVT.getSimpleVT().SimpleTy) {
8959  case MVT::i1: {
8960  // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
8961  // lowering code.
8962  if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
8963  if (ConstVal->isOne())
8964  return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
8965  // TODO: Add special case for constant false
8966  }
8967  // The general case of i1. There isn't any natural way to do this,
8968  // so we use some trickery with whilelo.
8969  SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
8970  SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
8971  DAG.getValueType(MVT::i1));
8972  SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
8973  MVT::i64);
8974  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
8975  DAG.getConstant(0, dl, MVT::i64), SplatVal);
8976  }
8977  case MVT::i8:
8978  case MVT::i16:
8979  case MVT::i32:
8980  SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
8981  break;
8982  case MVT::i64:
8983  SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
8984  break;
8985  case MVT::f16:
8986  case MVT::bf16:
8987  case MVT::f32:
8988  case MVT::f64:
8989  // Fine as is
8990  break;
8991  default:
8992  report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
8993  }
8994 
8995  return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
8996 }
8997 
8998 SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
8999  SelectionDAG &DAG) const {
9000  SDLoc DL(Op);
9001 
9002  EVT VT = Op.getValueType();
9003  if (!isTypeLegal(VT) || !VT.isScalableVector())
9004  return SDValue();
9005 
9006  // Current lowering only supports the SVE-ACLE types.
9008  return SDValue();
9009 
9010  // The DUPQ operation is indepedent of element type so normalise to i64s.
9011  SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
9012  SDValue Idx128 = Op.getOperand(2);
9013 
9014  // DUPQ can be used when idx is in range.
9015  auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
9016  if (CIdx && (CIdx->getZExtValue() <= 3)) {
9017  SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
9018  SDNode *DUPQ =
9019  DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
9020  return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
9021  }
9022 
9023  // The ACLE says this must produce the same result as:
9024  // svtbl(data, svadd_x(svptrue_b64(),
9025  // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
9026  // index * 2))
9027  SDValue One = DAG.getConstant(1, DL, MVT::i64);
9028  SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
9029 
9030  // create the vector 0,1,0,1,...
9031  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
9033  DL, MVT::nxv2i64, Zero, One);
9034  SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
9035 
9036  // create the vector idx64,idx64+1,idx64,idx64+1,...
9037  SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
9038  SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
9039  SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
9040 
9041  // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
9042  SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
9043  return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
9044 }
9045 
9046 
9047 static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
9048  APInt &UndefBits) {
9049  EVT VT = BVN->getValueType(0);
9050  APInt SplatBits, SplatUndef;
9051  unsigned SplatBitSize;
9052  bool HasAnyUndefs;
9053  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9054  unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
9055 
9056  for (unsigned i = 0; i < NumSplats; ++i) {
9057  CnstBits <<= SplatBitSize;
9058  UndefBits <<= SplatBitSize;
9059  CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
9060  UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
9061  }
9062 
9063  return true;
9064  }
9065 
9066  return false;
9067 }
9068 
9069 // Try 64-bit splatted SIMD immediate.
9070 static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9071  const APInt &Bits) {
9072  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9073  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9074  EVT VT = Op.getValueType();
9075  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
9076 
9079 
9080  SDLoc dl(Op);
9081  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9082  DAG.getConstant(Value, dl, MVT::i32));
9083  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9084  }
9085  }
9086 
9087  return SDValue();
9088 }
9089 
9090 // Try 32-bit splatted SIMD immediate.
9091 static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9092  const APInt &Bits,
9093  const SDValue *LHS = nullptr) {
9094  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9095  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9096  EVT VT = Op.getValueType();
9097  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9098  bool isAdvSIMDModImm = false;
9099  uint64_t Shift;
9100 
9101  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
9103  Shift = 0;
9104  }
9105  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
9107  Shift = 8;
9108  }
9109  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
9111  Shift = 16;
9112  }
9113  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
9115  Shift = 24;
9116  }
9117 
9118  if (isAdvSIMDModImm) {
9119  SDLoc dl(Op);
9120  SDValue Mov;
9121 
9122  if (LHS)
9123  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9124  DAG.getConstant(Value, dl, MVT::i32),
9125  DAG.getConstant(Shift, dl, MVT::i32));
9126  else
9127  Mov = DAG.getNode(NewOp, dl, MovTy,
9128  DAG.getConstant(Value, dl, MVT::i32),
9129  DAG.getConstant(Shift, dl, MVT::i32));
9130 
9131  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9132  }
9133  }
9134 
9135  return SDValue();
9136 }
9137 
9138 // Try 16-bit splatted SIMD immediate.
9139 static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9140  const APInt &Bits,
9141  const SDValue *LHS = nullptr) {
9142  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9143  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9144  EVT VT = Op.getValueType();
9145  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
9146  bool isAdvSIMDModImm = false;
9147  uint64_t Shift;
9148 
9149  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
9151  Shift = 0;
9152  }
9153  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
9155  Shift = 8;
9156  }
9157 
9158  if (isAdvSIMDModImm) {
9159  SDLoc dl(Op);
9160  SDValue Mov;
9161 
9162  if (LHS)
9163  Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9164  DAG.getConstant(Value, dl, MVT::i32),
9165  DAG.getConstant(Shift, dl, MVT::i32));
9166  else
9167  Mov = DAG.getNode(NewOp, dl, MovTy,
9168  DAG.getConstant(Value, dl, MVT::i32),
9169  DAG.getConstant(Shift, dl, MVT::i32));
9170 
9171  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9172  }
9173  }
9174 
9175  return SDValue();
9176 }
9177 
9178 // Try 32-bit splatted SIMD immediate with shifted ones.
9179 static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
9180  SelectionDAG &DAG, const APInt &Bits) {
9181  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9182  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9183  EVT VT = Op.getValueType();
9184  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9185  bool isAdvSIMDModImm = false;
9186  uint64_t Shift;
9187 
9188  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
9190  Shift = 264;
9191  }
9192  else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
9194  Shift = 272;
9195  }
9196 
9197  if (isAdvSIMDModImm) {
9198  SDLoc dl(Op);
9199  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9200  DAG.getConstant(Value, dl, MVT::i32),
9201  DAG.getConstant(Shift, dl, MVT::i32));
9202  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9203  }
9204  }
9205 
9206  return SDValue();
9207 }
9208 
9209 // Try 8-bit splatted SIMD immediate.
9210 static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9211  const APInt &Bits) {
9212  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9213  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9214  EVT VT = Op.getValueType();
9215  MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
9216 
9219 
9220  SDLoc dl(Op);
9221  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9222  DAG.getConstant(Value, dl, MVT::i32));
9223  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9224  }
9225  }
9226 
9227  return SDValue();
9228 }
9229 
9230 // Try FP splatted SIMD immediate.
9231 static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9232  const APInt &Bits) {
9233  if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9234  uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9235  EVT VT = Op.getValueType();
9236  bool isWide = (VT.getSizeInBits() == 128);
9237  MVT MovTy;
9238  bool isAdvSIMDModImm = false;
9239 
9240  if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
9242  MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
9243  }
9244  else if (isWide &&
9245  (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
9247  MovTy = MVT::v2f64;
9248  }
9249 
9250  if (isAdvSIMDModImm) {
9251  SDLoc dl(Op);
9252  SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9253  DAG.getConstant(Value, dl, MVT::i32));
9254  return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9255  }
9256  }
9257 
9258  return SDValue();
9259 }
9260 
9261 // Specialized code to quickly find if PotentialBVec is a BuildVector that
9262 // consists of only the same constant int value, returned in reference arg
9263 // ConstVal
9264 static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
9265  uint64_t &ConstVal) {
9266  BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
9267  if (!Bvec)
9268  return false;
9269  ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
9270  if (!FirstElt)
9271  return false;
9272  EVT VT = Bvec->getValueType(0);
9273  unsigned NumElts = VT.getVectorNumElements();
9274  for (unsigned i = 1; i < NumElts; ++i)
9275  if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
9276  return false;
9277  ConstVal = FirstElt->getZExtValue();
9278  return true;
9279 }
9280 
9281 static unsigned getIntrinsicID(const SDNode *N) {
9282  unsigned Opcode = N->getOpcode();
9283  switch (Opcode) {
9284  default:
9285  return Intrinsic::not_intrinsic;
9286  case ISD::INTRINSIC_WO_CHAIN: {
9287  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9288  if (IID < Intrinsic::num_intrinsics)
9289  return IID;
9290  return Intrinsic::not_intrinsic;
9291  }
9292  }
9293 }
9294 
9295 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
9296 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
9297 // BUILD_VECTORs with constant element C1, C2 is a constant, and:
9298 // - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
9299 // - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
9300 // The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
9302  EVT VT = N->getValueType(0);
9303 
9304  if (!VT.isVector())
9305  return SDValue();
9306 
9307  SDLoc DL(N);
9308 
9309  SDValue And;
9310  SDValue Shift;
9311 
9312  SDValue FirstOp = N->getOperand(0);
9313  unsigned FirstOpc = FirstOp.getOpcode();
9314  SDValue SecondOp = N->getOperand(1);
9315  unsigned SecondOpc = SecondOp.getOpcode();
9316 
9317  // Is one of the operands an AND or a BICi? The AND may have been optimised to
9318  // a BICi in order to use an immediate instead of a register.
9319  // Is the other operand an shl or lshr? This will have been turned into:
9320  // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
9321  if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
9322  (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
9323  And = FirstOp;
9324  Shift = SecondOp;
9325 
9326  } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
9327  (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
9328  And = SecondOp;
9329  Shift = FirstOp;
9330  } else
9331  return SDValue();
9332 
9333  bool IsAnd = And.getOpcode() == ISD::AND;
9334  bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
9335 
9336  // Is the shift amount constant?
9337  ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
9338  if (!C2node)
9339  return SDValue();
9340 
9341  uint64_t C1;
9342  if (IsAnd) {
9343  // Is the and mask vector all constant?
9344  if (!isAllConstantBuildVector(And.getOperand(1), C1))
9345  return SDValue();
9346  } else {
9347  // Reconstruct the corresponding AND immediate from the two BICi immediates.
9348  ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
9349  ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
9350  assert(C1nodeImm && C1nodeShift);
9351  C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
9352  }
9353 
9354  // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
9355  // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
9356  // how much one can shift elements of a particular size?
9357  uint64_t C2 = C2node->getZExtValue();
9358  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
9359  if (C2 > ElemSizeInBits)
9360  return SDValue();
9361 
9362  APInt C1AsAPInt(ElemSizeInBits, C1);
9363  APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
9364  : APInt::getLowBitsSet(ElemSizeInBits, C2);
9365  if (C1AsAPInt != RequiredC1)
9366  return SDValue();
9367 
9368  SDValue X = And.getOperand(0);
9369  SDValue Y = Shift.getOperand(0);
9370 
9371  unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
9372  SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
9373 
9374  LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
9375  LLVM_DEBUG(N->dump(&DAG));
9376  LLVM_DEBUG(dbgs() << "into: \n");
9377  LLVM_DEBUG(ResultSLI->dump(&DAG));
9378 
9379  ++NumShiftInserts;
9380  return ResultSLI;
9381 }
9382 
9383 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
9384  SelectionDAG &DAG) const {
9385  if (useSVEForFixedLengthVectorVT(Op.getValueType()))
9386  return LowerToScalableOp(Op, DAG);
9387 
9388  // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
9389  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
9390  return Res;
9391 
9392  EVT VT = Op.getValueType();
9393 
9394  SDValue LHS = Op.getOperand(0);
9395  BuildVectorSDNode *BVN =
9396  dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
9397  if (!BVN) {
9398  // OR commutes, so try swapping the operands.
9399  LHS = Op.getOperand(1);
9400  BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
9401  }
9402  if (!BVN)
9403  return Op;
9404 
9405  APInt DefBits(VT.getSizeInBits(), 0);
9406  APInt UndefBits(VT.getSizeInBits(), 0);
9407  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
9408  SDValue NewOp;
9409 
9410  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
9411  DefBits, &LHS)) ||
9412  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
9413  DefBits, &LHS)))
9414  return NewOp;
9415 
9416  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
9417  UndefBits, &LHS)) ||
9418  (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
9419  UndefBits, &LHS)))
9420  return NewOp;
9421  }
9422 
9423  // We can always fall back to a non-immediate OR.
9424  return Op;
9425 }
9426 
9427 // Normalize the operands of BUILD_VECTOR. The value of constant operands will
9428 // be truncated to fit element width.
9430  SelectionDAG &DAG) {
9431  assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
9432  SDLoc dl(Op);
9433  EVT VT = Op.getValueType();
9434  EVT EltTy= VT.getVectorElementType();
9435 
9436  if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
9437  return Op;
9438 
9440  for (SDValue Lane : Op->ops()) {
9441  // For integer vectors, type legalization would have promoted the
9442  // operands already. Otherwise, if Op is a floating-point splat
9443  // (with operands cast to integers), then the only possibilities
9444  // are constants and UNDEFs.
9445  if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
9446  APInt LowBits(EltTy.getSizeInBits(),
9447  CstLane->getZExtValue());
9448  Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
9449  } else if (Lane.getNode()->isUndef()) {
9450  Lane = DAG.getUNDEF(MVT::i32);
9451  } else {
9452  assert(Lane.getValueType() == MVT::i32 &&
9453  "Unexpected BUILD_VECTOR operand type");
9454  }
9455  Ops.push_back(Lane);
9456  }
9457  return DAG.getBuildVector(VT, dl, Ops);
9458 }
9459 
9461  EVT VT = Op.getValueType();
9462 
9463  APInt DefBits(VT.getSizeInBits(), 0);
9464  APInt UndefBits(VT.getSizeInBits(), 0);
9465  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
9466  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
9467  SDValue NewOp;
9468  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
9469  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
9470  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
9471  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
9472  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
9473  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
9474  return NewOp;
9475 
9476  DefBits = ~DefBits;
9477  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
9478  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
9479  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
9480  return NewOp;
9481 
9482  DefBits = UndefBits;
9483  if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
9484  (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
9485  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
9486  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
9487  (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
9488  (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
9489  return NewOp;
9490 
9491  DefBits = ~UndefBits;
9492  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
9493  (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
9494  (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
9495  return NewOp;
9496  }
9497 
9498  return SDValue();
9499 }
9500 
9501 SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
9502  SelectionDAG &DAG) const {
9503  EVT VT = Op.getValueType();
9504 
9505  // Try to build a simple constant vector.
9506  Op = NormalizeBuildVector(Op, DAG);
9507  if (VT.isInteger()) {
9508  // Certain vector constants, used to express things like logical NOT and
9509  // arithmetic NEG, are passed through unmodified. This allows special
9510  // patterns for these operations to match, which will lower these constants
9511  // to whatever is proven necessary.
9512  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
9513  if (BVN->isConstant())
9514  if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
9515  unsigned BitSize = VT.getVectorElementType().getSizeInBits();
9516  APInt Val(BitSize,
9517  Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
9518  if (Val.isNullValue() || Val.isAllOnesValue())
9519  return Op;
9520  }
9521  }
9522 
9523  if (SDValue V = ConstantBuildVector(Op, DAG))
9524  return V;
9525 
9526  // Scan through the operands to find some interesting properties we can
9527  // exploit:
9528  // 1) If only one value is used, we can use a DUP, or
9529  // 2) if only the low element is not undef, we can just insert that, or
9530  // 3) if only one constant value is used (w/ some non-constant lanes),
9531  // we can splat the constant value into the whole vector then fill
9532  // in the non-constant lanes.
9533  // 4) FIXME: If different constant values are used, but we can intelligently
9534  // select the values we'll be overwriting for the non-constant
9535  // lanes such that we can directly materialize the vector
9536  // some other way (MOVI, e.g.), we can be sneaky.
9537  // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
9538  SDLoc dl(Op);
9539  unsigned NumElts = VT.getVectorNumElements();
9540  bool isOnlyLowElement = true;
9541  bool usesOnlyOneValue = true;
9542  bool usesOnlyOneConstantValue = true;
9543  bool isConstant = true;
9544  bool AllLanesExtractElt = true;
9545  unsigned NumConstantLanes = 0;
9546  unsigned NumDifferentLanes = 0;
9547  unsigned NumUndefLanes = 0;
9548  SDValue Value;
9549  SDValue ConstantValue;
9550  for (unsigned i = 0; i < NumElts; ++i) {
9551  SDValue V = Op.getOperand(i);
9553  AllLanesExtractElt = false;
9554  if (V.isUndef()) {
9555  ++NumUndefLanes;
9556  continue;
9557  }
9558  if (i > 0)
9559  isOnlyLowElement = false;
9560  if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
9561  isConstant = false;
9562 
9563  if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
9564  ++NumConstantLanes;
9565  if (!ConstantValue.getNode())
9566  ConstantValue = V;
9567  else if (ConstantValue != V)
9568  usesOnlyOneConstantValue = false;
9569  }
9570 
9571  if (!Value.getNode())
9572  Value = V;
9573  else if (V != Value) {
9574  usesOnlyOneValue = false;
9575  ++NumDifferentLanes;
9576  }
9577  }
9578 
9579  if (!Value.getNode()) {
9580  LLVM_DEBUG(
9581  dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n");
9582  return DAG.getUNDEF(VT);
9583  }
9584 
9585  // Convert BUILD_VECTOR where all elements but the lowest are undef into
9586  // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
9587  // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
9588  if (isOnlyLowElement && !(NumElts == 1 && isa<ConstantSDNode>(Value))) {
9589  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
9590  "SCALAR_TO_VECTOR node\n");
9591  return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
9592  }
9593 
9594  if (AllLanesExtractElt) {
9595  SDNode *Vector = nullptr;
9596  bool Even = false;
9597  bool Odd = false;
9598  // Check whether the extract elements match the Even pattern <0,2,4,...> or
9599  // the Odd pattern <1,3,5,...>.
9600  for (unsigned i = 0; i < NumElts; ++i) {
9601  SDValue V = Op.getOperand(i);
9602  const SDNode *N = V.getNode();
9603  if (!isa<ConstantSDNode>(N->getOperand(1)))
9604  break;
9605  SDValue N0 = N->getOperand(0);
9606 
9607  // All elements are extracted from the same vector.
9608  if (!Vector) {
9609  Vector = N0.getNode();
9610  // Check that the type of EXTRACT_VECTOR_ELT matches the type of
9611  // BUILD_VECTOR.
9612  if (VT.getVectorElementType() !=
9614  break;
9615  } else if (Vector != N0.getNode()) {
9616  Odd = false;
9617  Even = false;
9618  break;
9619  }
9620 
9621  // Extracted values are either at Even indices <0,2,4,...> or at Odd
9622  // indices <1,3,5,...>.
9623  uint64_t Val = N->getConstantOperandVal(1);
9624  if (Val == 2 * i) {
9625  Even = true;
9626  continue;
9627  }
9628  if (Val - 1 == 2 * i) {
9629  Odd = true;
9630  continue;
9631  }
9632 
9633  // Something does not match: abort.
9634  Odd = false;
9635  Even = false;
9636  break;
9637  }
9638  if (Even || Odd) {
9639  SDValue LHS =
9640  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
9641  DAG.getConstant(0, dl, MVT::i64));
9642  SDValue RHS =
9643  DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
9644  DAG.getConstant(NumElts, dl, MVT::i64));
9645 
9646  if (Even && !Odd)
9647  return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
9648  RHS);
9649  if (Odd && !Even)
9650  return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
9651  RHS);
9652  }
9653  }
9654 
9655  // Use DUP for non-constant splats. For f32 constant splats, reduce to
9656  // i32 and try again.
9657  if (usesOnlyOneValue) {
9658  if (!isConstant) {
9659  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
9660  Value.getValueType() != VT) {
9661  LLVM_DEBUG(
9662  dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
9663  return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
9664  }
9665 
9666  // This is actually a DUPLANExx operation, which keeps everything vectory.
9667 
9668  SDValue Lane = Value.getOperand(1);
9669  Value = Value.getOperand(0);
9670  if (Value.getValueSizeInBits() == 64) {
9671  LLVM_DEBUG(
9672  dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
9673  "widening it\n");
9674  Value = WidenVector(Value, DAG);
9675  }
9676 
9677  unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
9678  return DAG.getNode(Opcode, dl, VT, Value, Lane);
9679  }
9680 
9683  EVT EltTy = VT.getVectorElementType();
9684  assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||
9685  EltTy == MVT::f64) && "Unsupported floating-point vector type");
9686  LLVM_DEBUG(
9687  dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
9688  "BITCASTS, and try again\n");
9689  MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
9690  for (unsigned i = 0; i < NumElts; ++i)
9691  Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
9692  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
9693  SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
9694  LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
9695  Val.dump(););
9696  Val = LowerBUILD_VECTOR(Val, DAG);
9697  if (Val.getNode())
9698  return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9699  }
9700  }
9701 
9702  // If we need to insert a small number of different non-constant elements and
9703  // the vector width is sufficiently large, prefer using DUP with the common
9704  // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
9705  // skip the constant lane handling below.
9706  bool PreferDUPAndInsert =
9707  !isConstant && NumDifferentLanes >= 1 &&
9708  NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
9709  NumDifferentLanes >= NumConstantLanes;
9710 
9711  // If there was only one constant value used and for more than one lane,
9712  // start by splatting that value, then replace the non-constant lanes. This
9713  // is better than the default, which will perform a separate initialization
9714  // for each lane.
9715  if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
9716  // Firstly, try to materialize the splat constant.
9717  SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
9718  Val = ConstantBuildVector(Vec, DAG);
9719  if (!Val) {
9720  // Otherwise, materialize the constant and splat it.
9721  Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
9722  DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
9723  }
9724 
9725  // Now insert the non-constant lanes.
9726  for (unsigned i = 0; i < NumElts; ++i) {
9727  SDValue V = Op.getOperand(i);
9728  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
9729  if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V))
9730  // Note that type legalization likely mucked about with the VT of the
9731  // source operand, so we may have to convert it here before inserting.
9732  Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
9733  }
9734  return Val;
9735  }
9736 
9737  // This will generate a load from the constant pool.
9738  if (isConstant) {
9739  LLVM_DEBUG(
9740  dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
9741  "expansion\n");
9742  return SDValue();
9743  }
9744 
9745  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
9746  if (NumElts >= 4) {
9747  if (SDValue shuffle = ReconstructShuffle(Op, DAG))
9748  return shuffle;
9749  }
9750 
9751  if (PreferDUPAndInsert) {
9752  // First, build a constant vector with the common element.
9754  for (unsigned I = 0; I < NumElts; ++I)
9755  Ops.push_back(Value);
9756  SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
9757  // Next, insert the elements that do not match the common value.
9758  for (unsigned I = 0; I < NumElts; ++I)
9759  if (Op.getOperand(I) != Value)
9760  NewVector =
9761  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
9762  Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
9763 
9764  return NewVector;
9765  }
9766 
9767  // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
9768  // know the default expansion would otherwise fall back on something even
9769  // worse. For a vector with one or two non-undef values, that's
9770  // scalar_to_vector for the elements followed by a shuffle (provided the
9771  // shuffle is valid for the target) and materialization element by element
9772  // on the stack followed by a load for everything else.
9773  if (!isConstant && !usesOnlyOneValue) {
9774  LLVM_DEBUG(
9775  dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
9776  "of INSERT_VECTOR_ELT\n");
9777 
9778  SDValue Vec = DAG.getUNDEF(VT);
9779  SDValue Op0 = Op.getOperand(0);
9780  unsigned i = 0;
9781 
9782  // Use SCALAR_TO_VECTOR for lane zero to
9783  // a) Avoid a RMW dependency on the full vector register, and
9784  // b) Allow the register coalescer to fold away the copy if the
9785  // value is already in an S or D register, and we're forced to emit an
9786  // INSERT_SUBREG that we can't fold anywhere.
9787  //
9788  // We also allow types like i8 and i16 which are illegal scalar but legal
9789  // vector element types. After type-legalization the inserted value is
9790  // extended (i32) and it is safe to cast them to the vector type by ignoring
9791  // the upper bits of the lowest lane (e.g. v8i8, v4i16).
9792  if (!Op0.isUndef()) {
9793  LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
9794  Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
9795  ++i;
9796  }
9797  LLVM_DEBUG(if (i < NumElts) dbgs()
9798  << "Creating nodes for the other vector elements:\n";);
9799  for (; i < NumElts; ++i) {
9800  SDValue V = Op.getOperand(i);
9801  if (V.isUndef())
9802  continue;
9803  SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
9804  Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
9805  }
9806  return Vec;
9807  }
9808 
9809  LLVM_DEBUG(
9810  dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
9811  "better alternative\n");
9812  return SDValue();
9813 }
9814 
9815 SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
9816  SelectionDAG &DAG) const {
9817  assert(Op.getValueType().isScalableVector() &&
9818  isTypeLegal(Op.getValueType()) &&
9819  "Expected legal scalable vector type!");
9820 
9821  if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
9822  return Op;
9823 
9824  return SDValue();
9825 }
9826 
9827 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9828  SelectionDAG &DAG) const {
9829  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
9830 
9831  // Check for non-constant or out of range lane.
9832  EVT VT = Op.getOperand(0).getValueType();
9833  ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
9834  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
9835  return SDValue();
9836 
9837 
9838  // Insertion/extraction are legal for V128 types.
9839  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
9840  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
9841  VT == MVT::v8f16 || VT == MVT::v8bf16)
9842  return Op;
9843 
9844  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
9845  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
9846  VT != MVT::v4bf16)
9847  return SDValue();
9848 
9849  // For V64 types, we perform insertion by expanding the value
9850  // to a V128 type and perform the insertion on that.
9851  SDLoc DL(Op);
9852  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
9853  EVT WideTy = WideVec.getValueType();
9854 
9855  SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
9856  Op.getOperand(1), Op.getOperand(2));
9857  // Re-narrow the resultant vector.
9858  return NarrowVector(Node, DAG);
9859 }
9860 
9861 SDValue
9862 AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
9863  SelectionDAG &DAG) const {
9864  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
9865 
9866  // Check for non-constant or out of range lane.
9867  EVT VT = Op.getOperand(0).getValueType();
9868  ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
9869  if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
9870  return SDValue();
9871 
9872 
9873  // Insertion/extraction are legal for V128 types.
9874  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
9875  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
9876  VT == MVT::v8f16 || VT == MVT::v8bf16)
9877  return Op;
9878 
9879  if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
9880  VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
9881  VT != MVT::v4bf16)
9882  return SDValue();
9883 
9884  // For V64 types, we perform extraction by expanding the value
9885  // to a V128 type and perform the extraction on that.
9886  SDLoc DL(Op);
9887  SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
9888  EVT WideTy = WideVec.getValueType();
9889 
9890  EVT ExtrTy = WideTy.getVectorElementType();
9891  if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
9892  ExtrTy = MVT::i32;
9893 
9894  // For extractions, we just return the result directly.
9895  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
9896  Op.getOperand(1));
9897 }
9898 
9899 SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
9900  SelectionDAG &DAG) const {
9901  assert(Op.getValueType().isFixedLengthVector() &&
9902  "Only cases that extract a fixed length vector are supported!");
9903 
9904  EVT InVT = Op.getOperand(0).getValueType();
9905  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
9906  unsigned Size = Op.getValueSizeInBits();
9907 
9908  if (InVT.isScalableVector()) {
9909  // This will be matched by custom code during ISelDAGToDAG.
9910  if (Idx == 0 && isPackedVectorType(InVT, DAG))
9911  return Op;
9912 
9913  return SDValue();
9914  }
9915 
9916  // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
9917  if (Idx == 0 && InVT.getSizeInBits() <= 128)
9918  return Op;
9919 
9920  // If this is extracting the upper 64-bits of a 128-bit vector, we match
9921  // that directly.
9922  if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
9923  InVT.getSizeInBits() == 128)
9924  return Op;
9925 
9926  return SDValue();
9927 }
9928 
9929 SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
9930  SelectionDAG &DAG) const {
9931  assert(Op.getValueType().isScalableVector() &&
9932  "Only expect to lower inserts into scalable vectors!");
9933 
9934  EVT InVT = Op.getOperand(1).getValueType();
9935  unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
9936 
9937  if (InVT.isScalableVector()) {
9938  SDLoc DL(Op);
9939  EVT VT = Op.getValueType();
9940 
9941  if (!isTypeLegal(VT) || !VT.isInteger())
9942  return SDValue();
9943 
9944  SDValue Vec0 = Op.getOperand(0);
9945  SDValue Vec1 = Op.getOperand(1);
9946 
9947  // Ensure the subvector is half the size of the main vector.
9948  if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
9949  return SDValue();
9950 
9951  // Extend elements of smaller vector...
9952  EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
9953  SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
9954 
9955  if (Idx == 0) {
9956  SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
9957  return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
9958  } else if (Idx == InVT.getVectorMinNumElements()) {
9959  SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
9960  return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
9961  }
9962 
9963  return SDValue();
9964  }
9965 
9966  // This will be matched by custom code during ISelDAGToDAG.
9967  if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
9968  return Op;
9969 
9970  return SDValue();
9971 }
9972 
9973 SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
9974  EVT VT = Op.getValueType();
9975 
9976  if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
9977  return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
9978 
9979  assert(VT.isScalableVector() && "Expected a scalable vector.");
9980 
9981  bool Signed = Op.getOpcode() == ISD::SDIV;
9982  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
9983 
9984  if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
9985  return LowerToPredicatedOp(Op, DAG, PredOpcode);
9986 
9987  // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
9988  // operations, and truncate the result.
9989  EVT WidenedVT;
9990  if (VT == MVT::nxv16i8)
9991  WidenedVT = MVT::nxv8i16;
9992  else if (VT == MVT::nxv8i16)
9993  WidenedVT = MVT::nxv4i32;
9994  else
9995  llvm_unreachable("Unexpected Custom DIV operation");
9996 
9997  SDLoc dl(Op);
9998  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
9999  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
10000  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
10001  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
10002  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
10003  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
10004  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
10005  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
10006  return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
10007 }
10008 
10010  // Currently no fixed length shuffles that require SVE are legal.
10011  if (useSVEForFixedLengthVectorVT(VT))
10012  return false;
10013 
10014  if (VT.getVectorNumElements() == 4 &&
10015  (VT.is128BitVector() || VT.is64BitVector())) {
10016  unsigned PFIndexes[4];
10017  for (unsigned i = 0; i != 4; ++i) {
10018  if (M[i] < 0)
10019  PFIndexes[i] = 8;
10020  else
10021  PFIndexes[i] = M[i];
10022  }
10023 
10024  // Compute the index in the perfect shuffle table.
10025  unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10026  PFIndexes[2] * 9 + PFIndexes[3];
10027  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10028  unsigned Cost = (PFEntry >> 30);
10029 
10030  if (Cost <= 4)
10031  return true;
10032  }
10033 
10034  bool DummyBool;
10035  int DummyInt;
10036  unsigned DummyUnsigned;
10037 
10038  return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
10039  isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
10040  isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
10041  // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
10042  isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
10043  isZIPMask(M, VT, DummyUnsigned) ||
10044  isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
10045  isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
10046  isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
10047  isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
10048  isConcatMask(M, VT, VT.getSizeInBits() == 128));
10049 }
10050 
10051 /// getVShiftImm - Check if this is a valid build_vector for the immediate
10052 /// operand of a vector shift operation, where all the elements of the
10053 /// build_vector must have the same constant integer value.
10054 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
10055  // Ignore bit_converts.
10056  while (Op.getOpcode() == ISD::BITCAST)
10057  Op = Op.getOperand(0);
10058  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
10059  APInt SplatBits, SplatUndef;
10060  unsigned SplatBitSize;
10061  bool HasAnyUndefs;
10062  if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
10063  HasAnyUndefs, ElementBits) ||
10064  SplatBitSize > ElementBits)
10065  return false;
10066  Cnt = SplatBits.getSExtValue();
10067  return true;
10068 }
10069 
10070 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
10071 /// operand of a vector shift left operation. That value must be in the range:
10072 /// 0 <= Value < ElementBits for a left shift; or
10073 /// 0 <= Value <= ElementBits for a long left shift.
10074 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
10075  assert(VT.isVector() && "vector shift count is not a vector type");
10076  int64_t ElementBits = VT.getScalarSizeInBits();
10077  if (!getVShiftImm(Op, ElementBits, Cnt))
10078  return false;
10079  return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
10080 }
10081 
10082 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
10083 /// operand of a vector shift right operation. The value must be in the range:
10084 /// 1 <= Value <= ElementBits for a right shift; or
10085 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
10086  assert(VT.isVector() && "vector shift count is not a vector type");
10087  int64_t ElementBits = VT.getScalarSizeInBits();
10088  if (!getVShiftImm(Op, ElementBits, Cnt))
10089  return false;
10090  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
10091 }
10092 
10093 SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
10094  SelectionDAG &DAG) const {
10095  EVT VT = Op.getValueType();
10096 
10097  if (VT.getScalarType() == MVT::i1) {
10098  // Lower i1 truncate to `(x & 1) != 0`.
10099  SDLoc dl(Op);
10100  EVT OpVT = Op.getOperand(0).getValueType();
10101  SDValue Zero = DAG.getConstant(0, dl, OpVT);
10102  SDValue One = DAG.getConstant(1, dl, OpVT);
10103  SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
10104  return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
10105  }
10106 
10107  if (!VT.isVector() || VT.isScalableVector())
10108  return SDValue();
10109 
10110  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
10111  return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
10112 
10113  return SDValue();
10114 }
10115 
10116 SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
10117  SelectionDAG &DAG) const {
10118  EVT VT = Op.getValueType();
10119  SDLoc DL(Op);
10120  int64_t Cnt;
10121 
10122  if (!Op.getOperand(1).getValueType().isVector())
10123  return Op;
10124  unsigned EltSize = VT.getScalarSizeInBits();
10125 
10126  switch (Op.getOpcode()) {
10127  default:
10128  llvm_unreachable("unexpected shift opcode");
10129 
10130  case ISD::SHL:
10131  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
10132  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
10133 
10134  if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
10135  return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
10136  DAG.getConstant(Cnt, DL, MVT::i32));
10137  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
10138  DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
10139  MVT::i32),
10140  Op.getOperand(0), Op.getOperand(1));
10141  case ISD::SRA:
10142  case ISD::SRL:
10143  if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
10144  unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
10146  return LowerToPredicatedOp(Op, DAG, Opc);
10147  }
10148 
10149  // Right shift immediate
10150  if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
10151  unsigned Opc =
10152  (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
10153  return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
10154  DAG.getConstant(Cnt, DL, MVT::i32));
10155  }
10156 
10157  // Right shift register. Note, there is not a shift right register
10158  // instruction, but the shift left register instruction takes a signed
10159  // value, where negative numbers specify a right shift.
10160  unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
10161  : Intrinsic::aarch64_neon_ushl;
10162  // negate the shift amount
10163  SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
10164  SDValue NegShiftLeft =
10166  DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
10167  NegShift);
10168  return NegShiftLeft;
10169  }
10170 
10171  return SDValue();
10172 }
10173 
10175  AArch64CC::CondCode CC, bool NoNans, EVT VT,
10176  const SDLoc &dl, SelectionDAG &DAG) {
10177  EVT SrcVT = LHS.getValueType();
10178  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
10179  "function only supposed to emit natural comparisons");
10180 
10181  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10182  APInt CnstBits(VT.getSizeInBits(), 0);
10183  APInt UndefBits(VT.getSizeInBits(), 0);
10184  bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
10185  bool IsZero = IsCnst && (CnstBits == 0);
10186 
10187  if (SrcVT.getVectorElementType().isFloatingPoint()) {
10188  switch (CC) {
10189  default:
10190  return SDValue();
10191  case AArch64CC::NE: {
10192  SDValue Fcmeq;
10193  if (IsZero)
10194  Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10195  else
10196  Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10197  return DAG.getNOT(dl, Fcmeq, VT);
10198  }
10199  case AArch64CC::EQ:
10200  if (IsZero)
10201  return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10202  return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10203  case AArch64CC::GE:
10204  if (IsZero)
10205  return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
10206  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
10207  case AArch64CC::GT:
10208  if (IsZero)
10209  return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
10210  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
10211  case AArch64CC::LS:
10212  if (IsZero)
10213  return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
10214  return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
10215  case AArch64CC::LT:
10216  if (!NoNans)
10217  return SDValue();
10218  // If we ignore NaNs then we can use to the MI implementation.
10220  case AArch64CC::MI:
10221  if (IsZero)
10222  return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
10223  return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
10224  }
10225  }
10226 
10227  switch (CC) {
10228  default:
10229  return SDValue();
10230  case AArch64CC::NE: {
10231  SDValue Cmeq;
10232  if (IsZero)
10233  Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10234  else
10235  Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10236  return DAG.getNOT(dl, Cmeq, VT);
10237  }
10238  case AArch64CC::EQ:
10239  if (IsZero)
10240  return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10241  return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10242  case AArch64CC::GE:
10243  if (IsZero)
10244  return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
10245  return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
10246  case AArch64CC::GT:
10247  if (IsZero)
10248  return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
10249  return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
10250  case AArch64CC::LE:
10251  if (IsZero)
10252  return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
10253  return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
10254  case AArch64CC::LS:
10255  return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
10256  case AArch64CC::LO:
10257  return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
10258  case AArch64CC::LT:
10259  if (IsZero)
10260  return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
10261  return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
10262  case AArch64CC::HI:
10263  return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
10264  case AArch64CC::HS:
10265  return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
10266  }
10267 }
10268 
10269 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
10270  SelectionDAG &DAG) const {
10271  if (Op.getValueType().isScalableVector()) {
10272  if (Op.getOperand(0).getValueType().isFloatingPoint())
10273  return Op;
10274  return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
10275  }
10276 
10277  if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
10278  return LowerFixedLengthVectorSetccToSVE(Op, DAG);
10279 
10280  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
10281  SDValue LHS = Op.getOperand(0);
10282  SDValue RHS = Op.getOperand(1);
10284  SDLoc dl(Op);
10285 
10287  assert(LHS.getValueType() == RHS.getValueType());
10289  SDValue Cmp =
10290  EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
10291  return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
10292  }
10293 
10294  const bool FullFP16 =
10295  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
10296 
10297  // Make v4f16 (only) fcmp operations utilise vector instructions
10298  // v8f16 support will be a litle more complicated
10299  if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
10300  if (LHS.getValueType().getVectorNumElements() == 4) {
10301  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
10302  RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
10303  SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
10304  DAG.ReplaceAllUsesWith(Op, NewSetcc);
10305  CmpVT = MVT::v4i32;
10306  } else
10307  return SDValue();
10308  }
10309 
10310  assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||
10312 
10313  // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
10314  // clean. Some of them require two branches to implement.
10315  AArch64CC::CondCode CC1, CC2;
10316  bool ShouldInvert;
10317  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
10318 
10320  SDValue Cmp =
10321  EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
10322  if (!Cmp.getNode())
10323  return SDValue();
10324 
10325  if (CC2 != AArch64CC::AL) {
10326  SDValue Cmp2 =
10327  EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
10328  if (!Cmp2.getNode())
10329  return SDValue();
10330 
10331  Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
10332  }
10333 
10334  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
10335 
10336  if (ShouldInvert)
10337  Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
10338 
10339  return Cmp;
10340 }
10341 
10342 static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
10343  SelectionDAG &DAG) {
10344  SDValue VecOp = ScalarOp.getOperand(0);
10345  auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
10346  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
10347  DAG.getConstant(0, DL, MVT::i64));
10348 }
10349 
10350 SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
10351  SelectionDAG &DAG) const {
10352  SDValue Src = Op.getOperand(0);
10353 
10354  // Try to lower fixed length reductions to SVE.
10355  EVT SrcVT = Src.getValueType();
10356  bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
10357  Op.getOpcode() == ISD::VECREDUCE_OR ||
10358  Op.getOpcode() == ISD::VECREDUCE_XOR ||
10359  Op.getOpcode() == ISD::VECREDUCE_FADD ||
10360  (Op.getOpcode() != ISD::VECREDUCE_ADD &&
10361  SrcVT.getVectorElementType() == MVT::i64);
10362  if (SrcVT.isScalableVector() ||
10363  useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
10364 
10365  if (SrcVT.getVectorElementType() == MVT::i1)
10366  return LowerPredReductionToSVE(Op, DAG);
10367 
10368  switch (Op.getOpcode()) {
10369  case ISD::VECREDUCE_ADD:
10370  return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
10371  case ISD::VECREDUCE_AND:
10372  return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
10373  case ISD::VECREDUCE_OR:
10374  return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
10375  case ISD::VECREDUCE_SMAX:
10376  return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
10377  case ISD::VECREDUCE_SMIN:
10378  return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
10379  case ISD::VECREDUCE_UMAX:
10380  return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
10381  case ISD::VECREDUCE_UMIN:
10382  return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
10383  case ISD::VECREDUCE_XOR:
10384  return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
10385  case ISD::VECREDUCE_FADD:
10386  return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
10387  case ISD::VECREDUCE_FMAX:
10388  return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
10389  case ISD::VECREDUCE_FMIN:
10390  return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
10391  default:
10392  llvm_unreachable("Unhandled fixed length reduction");
10393  }
10394  }
10395 
10396  // Lower NEON reductions.
10397  SDLoc dl(Op);
10398  switch (Op.getOpcode()) {
10399  case ISD::VECREDUCE_ADD:
10400  return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
10401  case ISD::VECREDUCE_SMAX:
10402  return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
10403  case ISD::VECREDUCE_SMIN:
10404  return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
10405  case ISD::VECREDUCE_UMAX:
10406  return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
10407  case ISD::VECREDUCE_UMIN:
10408  return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
10409  case ISD::VECREDUCE_FMAX: {
10410  return DAG.getNode(
10411  ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
10412  DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
10413  Src);
10414  }
10415  case ISD::VECREDUCE_FMIN: {
10416  return DAG.getNode(
10417  ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
10418  DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
10419  Src);
10420  }
10421  default:
10422  llvm_unreachable("Unhandled reduction");
10423  }
10424 }
10425 
10426 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
10427  SelectionDAG &DAG) const {
10428  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
10429  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
10430  return SDValue();
10431 
10432  // LSE has an atomic load-add instruction, but not a load-sub.
10433  SDLoc dl(Op);
10434  MVT VT = Op.getSimpleValueType();
10435  SDValue RHS = Op.getOperand(2);
10436  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
10437  RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
10438  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
10439  Op.getOperand(0), Op.getOperand(1), RHS,
10440  AN->getMemOperand());
10441 }
10442 
10443 SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
10444  SelectionDAG &DAG) const {
10445  auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
10446  if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
10447  return SDValue();
10448 
10449  // LSE has an atomic load-clear instruction, but not a load-and.
10450  SDLoc dl(Op);
10451  MVT VT = Op.getSimpleValueType();
10452  SDValue RHS = Op.getOperand(2);
10453  AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
10454  RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
10455  return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
10456  Op.getOperand(0), Op.getOperand(1), RHS,
10457  AN->getMemOperand());
10458 }
10459 
10460 SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
10461  SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
10462  SDLoc dl(Op);
10463  EVT PtrVT = getPointerTy(DAG.getDataLayout());
10464  SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
10465 
10466  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
10467  const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
10468  if (Subtarget->hasCustomCallingConv())
10469  TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
10470 
10471  Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
10472  DAG.getConstant(4, dl, MVT::i64));
10473  Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
10474  Chain =
10476  Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
10477  DAG.getRegisterMask(Mask), Chain.getValue(1));
10478  // To match the actual intent better, we should read the output from X15 here
10479  // again (instead of potentially spilling it to the stack), but rereading Size
10480  // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
10481  // here.
10482 
10483  Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
10484  DAG.getConstant(4, dl, MVT::i64));
10485  return Chain;
10486 }
10487 
10488 SDValue
10489 AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
10490  SelectionDAG &DAG) const {
10491  assert(Subtarget->isTargetWindows() &&
10492  "Only Windows alloca probing supported");
10493  SDLoc dl(Op);
10494  // Get the inputs.
10495  SDNode *Node = Op.getNode();
10496  SDValue Chain = Op.getOperand(0);
10497  SDValue Size = Op.getOperand(1);
10498  MaybeAlign Align =
10499  cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
10500  EVT VT = Node->getValueType(0);
10501 
10503  "no-stack-arg-probe")) {
10504  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
10505  Chain = SP.getValue(1);
10506  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
10507  if (Align)
10508  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
10509  DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
10510  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
10511  SDValue Ops[2] = {SP, Chain};
10512  return DAG.getMergeValues(Ops, dl);
10513  }
10514 
10515  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
10516 
10517  Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
10518 
10519  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
10520  Chain = SP.getValue(1);
10521  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
10522  if (Align)
10523  SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
10524  DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
10525  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
10526 
10527  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
10528  DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
10529 
10530  SDValue Ops[2] = {SP, Chain};
10531  return DAG.getMergeValues(Ops, dl);
10532 }
10533 
10534 SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
10535  SelectionDAG &DAG) const {
10536  EVT VT = Op.getValueType();
10537  assert(VT != MVT::i64 && "Expected illegal VSCALE node");
10538 
10539  SDLoc DL(Op);
10540  APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
10541  return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
10542  DL, VT);
10543 }
10544 
10545 /// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
10546 template <unsigned NumVecs>
10547 static bool
10549  AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
10550  Info.opc = ISD::INTRINSIC_VOID;
10551  // Retrieve EC from first vector argument.
10552  const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
10554 #ifndef NDEBUG
10555  // Check the assumption that all input vectors are the same type.
10556  for (unsigned I = 0; I < NumVecs; ++I)
10557  assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&
10558  "Invalid type.");
10559 #endif
10560  // memVT is `NumVecs * VT`.
10561  Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
10562  EC * NumVecs);
10563  Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
10564  Info.offset = 0;
10565  Info.align.reset();
10567  return true;
10568 }
10569 
10570 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
10571 /// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
10572 /// specified in the intrinsic calls.
10574  const CallInst &I,
10575  MachineFunction &MF,
10576  unsigned Intrinsic) const {
10577  auto &DL = I.getModule()->getDataLayout();
10578  switch (Intrinsic) {
10579  case Intrinsic::aarch64_sve_st2:
10580  return setInfoSVEStN<2>(*this, DL, Info, I);
10581  case Intrinsic::aarch64_sve_st3:
10582  return setInfoSVEStN<3>(*this, DL, Info, I);
10583  case Intrinsic::aarch64_sve_st4:
10584  return setInfoSVEStN<4>(*this, DL, Info, I);
10585  case Intrinsic::aarch64_neon_ld2:
10586  case Intrinsic::aarch64_neon_ld3:
10587  case Intrinsic::aarch64_neon_ld4:
10588  case Intrinsic::aarch64_neon_ld1x2:
10589  case Intrinsic::aarch64_neon_ld1x3:
10590  case Intrinsic::aarch64_neon_ld1x4:
10591  case Intrinsic::aarch64_neon_ld2lane:
10592  case Intrinsic::aarch64_neon_ld3lane:
10593  case Intrinsic::aarch64_neon_ld4lane:
10594  case Intrinsic::aarch64_neon_ld2r:
10595  case Intrinsic::aarch64_neon_ld3r:
10596  case Intrinsic::aarch64_neon_ld4r: {
10598  // Conservatively set memVT to the entire set of vectors loaded.
10599  uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
10600  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
10601  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
10602  Info.offset = 0;
10603  Info.align.reset();
10604  // volatile loads with NEON intrinsics not supported
10606  return true;
10607  }
10608  case Intrinsic::aarch64_neon_st2:
10609  case Intrinsic::aarch64_neon_st3:
10610  case Intrinsic::aarch64_neon_st4:
10611  case Intrinsic::aarch64_neon_st1x2:
10612  case Intrinsic::aarch64_neon_st1x3:
10613  case Intrinsic::aarch64_neon_st1x4:
10614  case Intrinsic::aarch64_neon_st2lane:
10615  case Intrinsic::aarch64_neon_st3lane:
10616  case Intrinsic::aarch64_neon_st4lane: {
10617  Info.opc = ISD::INTRINSIC_VOID;
10618  // Conservatively set memVT to the entire set of vectors stored.
10619  unsigned NumElts = 0;
10620  for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
10621  Type *ArgTy = I.getArgOperand(ArgI)->getType();
10622  if (!ArgTy->isVectorTy())
10623  break;
10624  NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
10625  }
10626  Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
10627  Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
10628  Info.offset = 0;
10629  Info.align.reset();
10630  // volatile stores with NEON intrinsics not supported
10632  return true;
10633  }
10634  case Intrinsic::aarch64_ldaxr:
10635  case Intrinsic::aarch64_ldxr: {
10636  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
10638  Info.memVT = MVT::getVT(PtrTy->getElementType());
10639  Info.ptrVal = I.getArgOperand(0);
10640  Info.offset = 0;
10641  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
10643  return true;
10644  }
10645  case Intrinsic::aarch64_stlxr:
10646  case Intrinsic::aarch64_stxr: {
10647  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
10649  Info.memVT = MVT::getVT(PtrTy->getElementType());
10650  Info.ptrVal = I.getArgOperand(1);
10651  Info.offset = 0;
10652  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
10654  return true;
10655  }
10656  case Intrinsic::aarch64_ldaxp:
10657  case Intrinsic::aarch64_ldxp:
10659  Info.memVT = MVT::i128;
10660  Info.ptrVal = I.getArgOperand(0);
10661  Info.offset = 0;
10662  Info.align = Align(16);
10664  return true;
10665  case Intrinsic::aarch64_stlxp:
10666  case Intrinsic::aarch64_stxp:
10668  Info.memVT = MVT::i128;
10669  Info.ptrVal = I.getArgOperand(2);
10670  Info.offset = 0;
10671  Info.align = Align(16);
10673  return true;
10674  case Intrinsic::aarch64_sve_ldnt1: {
10675  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
10677  Info.memVT = MVT::getVT(I.getType());
10678  Info.ptrVal = I.getArgOperand(1);
10679  Info.offset = 0;
10680  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
10682  if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
10684  return true;
10685  }
10686  case Intrinsic::aarch64_sve_stnt1: {
10687  PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
10689  Info.memVT = MVT::getVT(I.getOperand(0)->getType());
10690  Info.ptrVal = I.getArgOperand(2);
10691  Info.offset = 0;
10692  Info.align = DL.getABITypeAlign(PtrTy->getElementType());
10694  if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
10696  return true;
10697  }
10698  default:
10699  break;
10700  }
10701 
10702  return false;
10703 }
10704 
10706  ISD::LoadExtType ExtTy,
10707  EVT NewVT) const {
10708  // TODO: This may be worth removing. Check regression tests for diffs.
10709  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
10710  return false;
10711 
10712  // If we're reducing the load width in order to avoid having to use an extra
10713  // instruction to do extension then it's probably a good idea.
10714  if (ExtTy != ISD::NON_EXTLOAD)
10715  return true;
10716  // Don't reduce load width if it would prevent us from combining a shift into
10717  // the offset.
10718  MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
10719  assert(Mem);
10720  const SDValue &Base = Mem->getBasePtr();
10721  if (Base.getOpcode() == ISD::ADD &&
10722  Base.getOperand(1).getOpcode() == ISD::SHL &&
10723  Base.getOperand(1).hasOneUse() &&
10724  Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
10725  // The shift can be combined if it matches the size of the value being
10726  // loaded (and so reducing the width would make it not match).
10727  uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
10728  uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
10729  if (ShiftAmount == Log2_32(LoadBytes))
10730  return false;
10731  }
10732  // We have no reason to disallow reducing the load width, so allow it.
10733  return true;
10734 }
10735 
10736 // Truncations from 64-bit GPR to 32-bit GPR is free.
10738  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
10739  return false;
10740  uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
10741  uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
10742  return NumBits1 > NumBits2;
10743 }
10745  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
10746  return false;
10747  uint64_t NumBits1 = VT1.getFixedSizeInBits();
10748  uint64_t NumBits2 = VT2.getFixedSizeInBits();
10749  return NumBits1 > NumBits2;
10750 }
10751 
10752 /// Check if it is profitable to hoist instruction in then/else to if.
10753 /// Not profitable if I and it's user can form a FMA instruction
10754 /// because we prefer FMSUB/FMADD.
10756  if (I->getOpcode() != Instruction::FMul)
10757  return true;
10758 
10759  if (!I->hasOneUse())
10760  return true;
10761 
10762  Instruction *User = I->user_back();
10763 
10764  if (User &&
10765  !(User->getOpcode() == Instruction::FSub ||
10766  User->getOpcode() == Instruction::FAdd))
10767  return true;
10768 
10769  const TargetOptions &Options = getTargetMachine().Options;
10770  const Function *F = I->getFunction();
10771  const DataLayout &DL = F->getParent()->getDataLayout();
10772  Type *Ty = User->getOperand(0)->getType();
10773 
10774  return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
10776  (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10777  Options.UnsafeFPMath));
10778 }
10779 
10780 // All 32-bit GPR operations implicitly zero the high-half of the corresponding
10781 // 64-bit GPR.
10783  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
10784  return false;
10785  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
10786  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
10787  return NumBits1 == 32 && NumBits2 == 64;
10788 }
10790  if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
10791  return false;
10792  unsigned NumBits1 = VT1.getSizeInBits();
10793  unsigned NumBits2 = VT2.getSizeInBits();
10794  return NumBits1 == 32 && NumBits2 == 64;
10795 }
10796 
10798  EVT VT1 = Val.getValueType();
10799  if (isZExtFree(VT1, VT2)) {
10800  return true;
10801  }
10802 
10803  if (Val.getOpcode() != ISD::LOAD)
10804  return false;
10805 
10806  // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
10807  return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
10808  VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
10809  VT1.getSizeInBits() <= 32);
10810 }
10811 
10812 bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
10813  if (isa<FPExtInst>(Ext))
10814  return false;
10815 
10816  // Vector types are not free.
10817  if (Ext->getType()->isVectorTy())
10818  return false;
10819 
10820  for (const Use &U : Ext->uses()) {
10821  // The extension is free if we can fold it with a left shift in an
10822  // addressing mode or an arithmetic operation: add, sub, and cmp.
10823 
10824  // Is there a shift?
10825  const Instruction *Instr = cast<Instruction>(U.getUser());
10826 
10827  // Is this a constant shift?
10828  switch (Instr->getOpcode()) {
10829  case Instruction::Shl:
10830  if (!isa<ConstantInt>(Instr->getOperand(1)))
10831  return false;
10832  break;
10833  case Instruction::GetElementPtr: {
10834  gep_type_iterator GTI = gep_type_begin(Instr);
10835  auto &DL = Ext->getModule()->getDataLayout();
10836  std::advance(GTI, U.getOperandNo()-1);
10837  Type *IdxTy = GTI.getIndexedType();
10838  // This extension will end up with a shift because of the scaling factor.
10839  // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
10840  // Get the shift amount based on the scaling factor:
10841  // log2(sizeof(IdxTy)) - log2(8).
10842  uint64_t ShiftAmt =
10843  countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
10844  // Is the constant foldable in the shift of the addressing mode?
10845  // I.e., shift amount is between 1 and 4 inclusive.
10846  if (ShiftAmt == 0 || ShiftAmt > 4)
10847  return false;
10848  break;
10849  }
10850  case Instruction::Trunc:
10851  // Check if this is a noop.
10852  // trunc(sext ty1 to ty2) to ty1.
10853  if (Instr->getType() == Ext->getOperand(0)->getType())
10854  continue;
10856  default:
10857  return false;
10858  }
10859 
10860  // At this point we can use the bfm family, so this extension is free
10861  // for that use.
10862  }
10863  return true;
10864 }
10865 
10866 /// Check if both Op1 and Op2 are shufflevector extracts of either the lower
10867 /// or upper half of the vector elements.
10868 static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
10869  auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
10870  auto *FullTy = FullV->getType();
10871  auto *HalfTy = HalfV->getType();
10872  return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
10873  2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
10874  };
10875 
10876  auto extractHalf = [](Value *FullV, Value *HalfV) {
10877  auto *FullVT = cast<FixedVectorType>(FullV->getType());
10878  auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
10879  return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
10880  };
10881 
10882  ArrayRef<int> M1, M2;
10883  Value *S1Op1, *S2Op1;
10884  if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
10885  !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
10886  return false;
10887 
10888  // Check that the operands are half as wide as the result and we extract
10889  // half of the elements of the input vectors.
10890  if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
10891  !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
10892  return false;
10893 
10894  // Check the mask extracts either the lower or upper half of vector
10895  // elements.
10896  int M1Start = -1;
10897  int M2Start = -1;
10898  int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
10899  if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
10900  !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
10901  M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
10902  return false;
10903 
10904  return true;
10905 }
10906 
10907 /// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
10908 /// of the vector elements.
10909 static bool areExtractExts(Value *Ext1, Value *Ext2) {
10910  auto areExtDoubled = [](Instruction *Ext) {
10911  return Ext->getType()->getScalarSizeInBits() ==
10912  2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
10913  };
10914 
10915  if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
10916  !match(Ext2, m_ZExtOrSExt(m_Value())) ||
10917  !areExtDoubled(cast<Instruction>(Ext1)) ||
10918  !areExtDoubled(cast<Instruction>(Ext2)))
10919  return false;
10920 
10921  return true;
10922 }
10923 
10924 /// Check if Op could be used with vmull_high_p64 intrinsic.
10926  Value *VectorOperand = nullptr;
10927  ConstantInt *ElementIndex = nullptr;
10928  return match(Op, m_ExtractElt(m_Value(VectorOperand),
10929  m_ConstantInt(ElementIndex))) &&
10930  ElementIndex->getValue() == 1 &&
10931  isa<FixedVectorType>(VectorOperand->getType()) &&
10932  cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
10933 }
10934 
10935 /// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
10936 static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
10938 }
10939 
10940 /// Check if sinking \p I's operands to I's basic block is profitable, because
10941 /// the operands can be folded into a target instruction, e.g.
10942 /// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
10944  Instruction *I, SmallVectorImpl<Use *> &Ops) const {
10945  if (!I->getType()->isVectorTy())
10946  return false;
10947 
10948  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
10949  switch (II->getIntrinsicID()) {
10950  case Intrinsic::aarch64_neon_umull:
10951  if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
10952  return false;
10953  Ops.push_back(&II->getOperandUse(0));
10954  Ops.push_back(&II->getOperandUse(1));
10955  return true;
10956 
10957  case Intrinsic::aarch64_neon_pmull64:
10958  if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
10959  II->getArgOperand(1)))
10960  return false;
10961  Ops.push_back(&II->getArgOperandUse(0));
10962  Ops.push_back(&II->getArgOperandUse(1));
10963  return true;
10964 
10965  default:
10966  return false;
10967  }
10968  }
10969 
10970  switch (I->getOpcode()) {
10971  case Instruction::Sub:
10972  case Instruction::Add: {
10973  if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
10974  return false;
10975 
10976  // If the exts' operands extract either the lower or upper elements, we
10977  // can sink them too.
10978  auto Ext1 = cast<Instruction>(I->getOperand(0));
10979  auto Ext2 = cast<Instruction>(I->getOperand(1));
10980  if (areExtractShuffleVectors(Ext1, Ext2)) {
10981  Ops.push_back(&Ext1->getOperandUse(0));
10982  Ops.push_back(&Ext2->getOperandUse(0));
10983  }
10984 
10985  Ops.push_back(&I->getOperandUse(0));
10986  Ops.push_back(&I->getOperandUse(1));
10987 
10988  return true;
10989  }
10990  case Instruction::Mul: {
10991  bool IsProfitable = false;
10992  for (auto &Op : I->operands()) {
10993  // Make sure we are not already sinking this operand
10994  if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
10995  continue;
10996 
10997  ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
10998  if (!Shuffle || !Shuffle->isZeroEltSplat())
10999  continue;
11000 
11001  Value *ShuffleOperand = Shuffle->getOperand(0);
11002  InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
11003  if (!Insert)
11004  continue;
11005 
11006  Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
11007  if (!OperandInstr)
11008  continue;
11009 
11010  ConstantInt *ElementConstant =
11011  dyn_cast<ConstantInt>(Insert->getOperand(2));
11012  // Check that the insertelement is inserting into element 0
11013  if (!ElementConstant || ElementConstant->getZExtValue() != 0)
11014  continue;
11015 
11016  unsigned Opcode = OperandInstr->getOpcode();
11017  if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
11018  continue;
11019 
11020  Ops.push_back(&Shuffle->getOperandUse(0));
11021  Ops.push_back(&Op);
11022  IsProfitable = true;
11023  }
11024 
11025  return IsProfitable;
11026  }
11027  default:
11028  return false;
11029  }
11030  return false;
11031 }
11032 
11034  Align &RequiredAligment) const {
11035  if (!LoadedType.isSimple() ||
11036  (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
11037  return false;
11038  // Cyclone supports unaligned accesses.
11039  RequiredAligment = Align(1);
11040  unsigned NumBits = LoadedType.getSizeInBits();
11041  return NumBits == 32 || NumBits == 64;
11042 }
11043 
11044 /// A helper function for determining the number of interleaved accesses we
11045 /// will generate when lowering accesses of the given type.
11046 unsigned
11048  const DataLayout &DL) const {
11049  return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
11050 }
11051 
11054  if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
11055  I.getMetadata(FALKOR_STRIDED_ACCESS_MD) != nullptr)
11056  return MOStridedAccess;
11058 }
11059 
11061  VectorType *VecTy, const DataLayout &DL) const {
11062 
11063  unsigned VecSize = DL.getTypeSizeInBits(VecTy);
11064  unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
11065 
11066  // Ensure the number of vector elements is greater than 1.
11067  if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
11068  return false;
11069 
11070  // Ensure the element type is legal.
11071  if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
11072  return false;
11073 
11074  // Ensure the total vector size is 64 or a multiple of 128. Types larger than
11075  // 128 will be split into multiple interleaved accesses.
11076  return VecSize == 64 || VecSize % 128 == 0;
11077 }
11078 
11079 /// Lower an interleaved load into a ldN intrinsic.
11080 ///
11081 /// E.g. Lower an interleaved load (Factor = 2):
11082 /// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
11083 /// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
11084 /// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
11085 ///
11086 /// Into:
11087 /// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
11088 /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
11089 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
11092  ArrayRef<unsigned> Indices, unsigned Factor) const {
11093  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
11094  "Invalid interleave factor");
11095  assert(!Shuffles.empty() && "Empty shufflevector input");
11096  assert(Shuffles.size() == Indices.size() &&
11097  "Unmatched number of shufflevectors and indices");
11098 
11099  const DataLayout &DL = LI->getModule()->getDataLayout();
11100 
11101  VectorType *VTy = Shuffles[0]->getType();
11102 
11103  // Skip if we do not have NEON and skip illegal vector types. We can
11104  // "legalize" wide vector types into multiple interleaved accesses as long as
11105  // the vector types are divisible by 128.
11106  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
11107  return false;
11108 
11109  unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
11110 
11111  auto *FVTy = cast<FixedVectorType>(VTy);
11112 
11113  // A pointer vector can not be the return type of the ldN intrinsics. Need to
11114  // load integer vectors first and then convert to pointer vectors.
11115  Type *EltTy = FVTy->getElementType();
11116  if (EltTy->isPointerTy())
11117  FVTy =
11118  FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
11119 
11120  IRBuilder<> Builder(LI);
11121 
11122  // The base address of the load.
11123  Value *BaseAddr = LI->getPointerOperand();
11124 
11125  if (NumLoads > 1) {
11126  // If we're going to generate more than one load, reset the sub-vector type
11127  // to something legal.
11128  FVTy = FixedVectorType::get(FVTy->getElementType(),
11129  FVTy->getNumElements() / NumLoads);
11130 
11131  // We will compute the pointer operand of each load from the original base
11132  // address using GEPs. Cast the base address to a pointer to the scalar
11133  // element type.
11134  BaseAddr = Builder.CreateBitCast(
11135  BaseAddr,
11136  FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
11137  }
11138 
11139  Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
11140  Type *Tys[2] = {FVTy, PtrTy};
11141  static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
11142  Intrinsic::aarch64_neon_ld3,
11143  Intrinsic::aarch64_neon_ld4};
11144  Function *LdNFunc =
11145  Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
11146 
11147  // Holds sub-vectors extracted from the load intrinsic return values. The
11148  // sub-vectors are associated with the shufflevector instructions they will
11149  // replace.
11151 
11152  for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
11153 
11154  // If we're generating more than one load, compute the base address of
11155  // subsequent loads as an offset from the previous.
11156  if (LoadCount > 0)
11157  BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
11158  FVTy->getNumElements() * Factor);
11159 
11160  CallInst *LdN = Builder.CreateCall(
11161  LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
11162 
11163  // Extract and store the sub-vectors returned by the load intrinsic.
11164  for (unsigned i = 0; i < Shuffles.size(); i++) {
11165  ShuffleVectorInst *SVI = Shuffles[i];
11166  unsigned Index = Indices[i];
11167 
11168  Value *SubVec = Builder.CreateExtractValue(LdN, Index);
11169 
11170  // Convert the integer vector to pointer vector if the element is pointer.
11171  if (EltTy->isPointerTy())
11172  SubVec = Builder.CreateIntToPtr(
11173  SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
11174  FVTy->getNumElements()));
11175  SubVecs[SVI].push_back(SubVec);
11176  }
11177  }
11178 
11179  // Replace uses of the shufflevector instructions with the sub-vectors
11180  // returned by the load intrinsic. If a shufflevector instruction is
11181  // associated with more than one sub-vector, those sub-vectors will be
11182  // concatenated into a single wide vector.
11183  for (ShuffleVectorInst *SVI : Shuffles) {
11184  auto &SubVec = SubVecs[SVI];
11185  auto *WideVec =
11186  SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
11187  SVI->replaceAllUsesWith(WideVec);
11188  }
11189 
11190  return true;
11191 }
11192 
11193 /// Lower an interleaved store into a stN intrinsic.
11194 ///
11195 /// E.g. Lower an interleaved store (Factor = 3):
11196 /// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
11197 /// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
11198 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
11199 ///
11200 /// Into:
11201 /// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
11202 /// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
11203 /// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
11204 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11205 ///
11206 /// Note that the new shufflevectors will be removed and we'll only generate one
11207 /// st3 instruction in CodeGen.
11208 ///
11209 /// Example for a more general valid mask (Factor 3). Lower:
11210 /// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
11211 /// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
11212 /// store <12 x i32> %i.vec, <12 x i32>* %ptr
11213 ///
11214 /// Into:
11215 /// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
11216 /// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
11217 /// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
11218 /// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11220  ShuffleVectorInst *SVI,
11221  unsigned Factor) const {
11222  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
11223  "Invalid interleave factor");
11224 
11225  auto *VecTy = cast<FixedVectorType>(SVI->getType());
11226  assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
11227 
11228  unsigned LaneLen = VecTy->getNumElements() / Factor;
11229  Type *EltTy = VecTy->getElementType();
11230  auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
11231 
11232  const DataLayout &DL = SI->getModule()->getDataLayout();
11233 
11234  // Skip if we do not have NEON and skip illegal vector types. We can
11235  // "legalize" wide vector types into multiple interleaved accesses as long as
11236  // the vector types are divisible by 128.
11237  if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
11238  return false;
11239 
11240  unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
11241 
11242  Value *Op0 = SVI->getOperand(0);
11243  Value *Op1 = SVI->getOperand(1);
11245 
11246  // StN intrinsics don't support pointer vectors as arguments. Convert pointer
11247  // vectors to integer vectors.
11248  if (EltTy->isPointerTy()) {
11249  Type *IntTy = DL.getIntPtrType(EltTy);
11250  unsigned NumOpElts =
11251  cast<FixedVectorType>(Op0->getType())->getNumElements();
11252 
11253  // Convert to the corresponding integer vector.
11254  auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
11255  Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
11256  Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
11257 
11258  SubVecTy = FixedVectorType::get(IntTy, LaneLen);
11259  }
11260 
11261  // The base address of the store.
11262  Value *BaseAddr = SI->getPointerOperand();
11263 
11264  if (NumStores > 1) {
11265  // If we're going to generate more than one store, reset the lane length
11266  // and sub-vector type to something legal.
11267  LaneLen /= NumStores;
11268  SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
11269 
11270  // We will compute the pointer operand of each store from the original base
11271  // address using GEPs. Cast the base address to a pointer to the scalar
11272  // element type.
11273  BaseAddr = Builder.CreateBitCast(
11274  BaseAddr,
11275  SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
11276  }
11277 
11278  auto Mask = SVI->getShuffleMask();
11279 
11280  Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
11281  Type *Tys[2] = {SubVecTy, PtrTy};
11282  static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
11283  Intrinsic::aarch64_neon_st3,
11284  Intrinsic::aarch64_neon_st4};
11285  Function *StNFunc =
11286  Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
11287 
11288  for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
11289 
11291 
11292  // Split the shufflevector operands into sub vectors for the new stN call.
11293  for (unsigned i = 0; i < Factor; i++) {
11294  unsigned IdxI = StoreCount * LaneLen * Factor + i;
11295  if (Mask[IdxI] >= 0) {
11296  Ops.push_back(Builder.CreateShuffleVector(
11297  Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
11298  } else {
11299  unsigned StartMask = 0;
11300  for (unsigned j = 1; j < LaneLen; j++) {
11301  unsigned IdxJ = StoreCount * LaneLen * Factor + j;
11302  if (Mask[IdxJ * Factor + IdxI] >= 0) {
11303  StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
11304  break;
11305  }
11306  }
11307  // Note: Filling undef gaps with random elements is ok, since
11308  // those elements were being written anyway (with undefs).
11309  // In the case of all undefs we're defaulting to using elems from 0
11310  // Note: StartMask cannot be negative, it's checked in
11311  // isReInterleaveMask
11312  Ops.push_back(Builder.CreateShuffleVector(
11313  Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
11314  }
11315  }
11316 
11317  // If we generating more than one store, we compute the base address of
11318  // subsequent stores as an offset from the previous.
11319  if (StoreCount > 0)
11320  BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
11321  BaseAddr, LaneLen * Factor);
11322 
11323  Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
11324  Builder.CreateCall(StNFunc, Ops);
11325  }
11326  return true;
11327 }
11328 
11329 // Lower an SVE structured load intrinsic returning a tuple type to target
11330 // specific intrinsic taking the same input but returning a multi-result value
11331 // of the split tuple type.
11332 //
11333 // E.g. Lowering an LD3:
11334 //
11335 // call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
11336 // <vscale x 4 x i1> %pred,
11337 // <vscale x 4 x i32>* %addr)
11338 //
11339 // Output DAG:
11340 //
11341 // t0: ch = EntryToken
11342 // t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
11343 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
11344 // t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
11345 // t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
11346 //
11347 // This is called pre-legalization to avoid widening/splitting issues with
11348 // non-power-of-2 tuple types used for LD3, such as nxv12i32.
11349 SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
11350  ArrayRef<SDValue> LoadOps,
11351  EVT VT, SelectionDAG &DAG,
11352  const SDLoc &DL) const {
11353  assert(VT.isScalableVector() && "Can only lower scalable vectors");
11354 
11355  unsigned N, Opcode;
11356  static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
11357  {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
11358  {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
11359  {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
11360 
11361  std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
11363  "invalid tuple vector type!");
11364 
11365  EVT SplitVT =
11368  assert(isTypeLegal(SplitVT));
11369 
11370  SmallVector<EVT, 5> VTs(N, SplitVT);
11371  VTs.push_back(MVT::Other); // Chain
11372  SDVTList NodeTys = DAG.getVTList(VTs);
11373 
11374  SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
11375  SmallVector<SDValue, 4> PseudoLoadOps;
11376  for (unsigned I = 0; I < N; ++I)
11377  PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
11378  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
11379 }
11380 
11382  const MemOp &Op, const AttributeList &FuncAttributes) const {
11383  bool CanImplicitFloat =
11384  !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
11385  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
11386  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
11387  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
11388  // taken one instruction to materialize the v2i64 zero and one store (with
11389  // restrictive addressing mode). Just do i64 stores.
11390  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
11391  auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
11392  if (Op.isAligned(AlignCheck))
11393  return true;
11394  bool Fast;
11396  &Fast) &&
11397  Fast;
11398  };
11399 
11400  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
11401  AlignmentIsAcceptable(MVT::v2i64, Align(16)))
11402  return MVT::v2i64;
11403  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
11404  return MVT::f128;
11405  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
11406  return MVT::i64;
11407  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
11408  return MVT::i32;
11409  return MVT::Other;
11410 }
11411 
11413  const MemOp &Op, const AttributeList &FuncAttributes) const {
11414  bool CanImplicitFloat =
11415  !FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat);
11416  bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
11417  bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
11418  // Only use AdvSIMD to implement memset of 32-byte and above. It would have
11419  // taken one instruction to materialize the v2i64 zero and one store (with
11420  // restrictive addressing mode). Just do i64 stores.
11421  bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
11422  auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
11423  if (Op.isAligned(AlignCheck))
11424  return true;
11425  bool Fast;
11427  &Fast) &&
11428  Fast;
11429  };
11430 
11431  if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
11432  AlignmentIsAcceptable(MVT::v2i64, Align(16)))
11433  return LLT::vector(2, 64);
11434  if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
11435  return LLT::scalar(128);
11436  if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
11437  return LLT::scalar(64);
11438  if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
11439  return LLT::scalar(32);
11440  return LLT();
11441 }
11442 
11443 // 12-bit optionally shifted immediates are legal for adds.
11445  if (Immed == std::numeric_limits<int64_t>::min()) {
11446  LLVM_DEBUG(dbgs() << "Illegal add imm " << Immed
11447  << ": avoid UB for INT64_MIN\n");
11448  return false;
11449  }
11450  // Same encoding for add/sub, just flip the sign.
11451  Immed = std::abs(Immed);
11452  bool IsLegal = ((Immed >> 12) == 0 ||
11453  ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
11454  LLVM_DEBUG(dbgs() << "Is " << Immed
11455  << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n");
11456  return IsLegal;
11457 }
11458 
11459 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid
11460 // immediates is the same as for an add or a sub.
11462  return isLegalAddImmediate(Immed);
11463 }
11464 
11465 /// isLegalAddressingMode - Return true if the addressing mode represented
11466 /// by AM is legal for this target, for a load/store of the specified type.
11468  const AddrMode &AM, Type *Ty,
11469  unsigned AS, Instruction *I) const {
11470  // AArch64 has five basic addressing modes:
11471  // reg
11472  // reg + 9-bit signed offset
11473  // reg + SIZE_IN_BYTES * 12-bit unsigned offset
11474  // reg1 + reg2
11475  // reg + SIZE_IN_BYTES * reg
11476 
11477  // No global is ever allowed as a base.
11478  if (AM.BaseGV)
11479  return false;
11480 
11481  // No reg+reg+imm addressing.
11482  if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
11483  return false;
11484 
11485  // FIXME: Update this method to support scalable addressing modes.
11486  if (isa<ScalableVectorType>(Ty))
11487  return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;
11488 
11489  // check reg + imm case:
11490  // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
11491  uint64_t NumBytes = 0;
11492  if (Ty->isSized()) {
11493  uint64_t NumBits = DL.getTypeSizeInBits(Ty);
11494  NumBytes = NumBits / 8;
11495  if (!isPowerOf2_64(NumBits))
11496  NumBytes = 0;
11497  }
11498 
11499  if (!AM.Scale) {
11500  int64_t Offset = AM.BaseOffs;
11501 
11502  // 9-bit signed offset
11503  if (isInt<9>(Offset))
11504  return true;
11505 
11506  // 12-bit unsigned offset
11507  unsigned shift = Log2_64(NumBytes);
11508  if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11509  // Must be a multiple of NumBytes (NumBytes is a power of 2)
11510  (Offset >> shift) << shift == Offset)
11511  return true;
11512  return false;
11513  }
11514 
11515  // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11516 
11517  return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
11518 }
11519 
11521  // Consider splitting large offset of struct or array.
11522  return true;
11523 }
11524 
11526  const AddrMode &AM, Type *Ty,
11527  unsigned AS) const {
11528  // Scaling factors are not free at all.
11529  // Operands | Rt Latency
11530  // -------------------------------------------
11531  // Rt, [Xn, Xm] | 4
11532  // -------------------------------------------
11533  // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
11534  // Rt, [Xn, Wm, <extend> #imm] |
11535  if (isLegalAddressingMode(DL, AM, Ty, AS))
11536  // Scale represents reg2 * scale, thus account for 1 if
11537  // it is not equal to 0 or 1.
11538  return AM.Scale != 0 && AM.Scale != 1;
11539  return -1;
11540 }
11541 
11543  const MachineFunction &MF, EVT VT) const {
11544  VT = VT.getScalarType();
11545 
11546  if (!VT.isSimple())
11547  return false;
11548 
11549  switch (VT.getSimpleVT().SimpleTy) {
11550  case MVT::f32:
11551  case MVT::f64:
11552  return true;
11553  default:
11554  break;
11555  }
11556 
11557  return false;
11558 }
11559 
11561  Type *Ty) const {
11562  switch (Ty->getScalarType()->getTypeID()) {
11563  case Type::FloatTyID:
11564  case Type::DoubleTyID:
11565  return true;
11566  default:
11567  return false;
11568  }
11569 }
11570 
11571 const MCPhysReg *
11573  // LR is a callee-save register, but we must treat it as clobbered by any call
11574  // site. Hence we include LR in the scratch registers, which are in turn added
11575  // as implicit-defs for stackmaps and patchpoints.
11576  static const MCPhysReg ScratchRegs[] = {
11577  AArch64::X16, AArch64::X17, AArch64::LR, 0
11578  };
11579  return ScratchRegs;
11580 }
11581 
11582 bool
11584  CombineLevel Level) const {
11585  N = N->getOperand(0).getNode();
11586  EVT VT = N->getValueType(0);
11587  // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
11588  // it with shift to let it be lowered to UBFX.
11589  if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
11590  isa<ConstantSDNode>(N->getOperand(1))) {
11591  uint64_t TruncMask = N->getConstantOperandVal(1);
11592  if (isMask_64(TruncMask) &&
11593  N->getOperand(0).getOpcode() == ISD::SRL &&
11594  isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
11595  return false;
11596  }
11597  return true;
11598 }
11599 
11601  Type *Ty) const {
11602  assert(Ty->isIntegerTy());
11603 
11604  unsigned BitSize = Ty->getPrimitiveSizeInBits();
11605  if (BitSize == 0)
11606  return false;
11607 
11608  int64_t Val = Imm.getSExtValue();
11609  if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
11610  return true;
11611 
11612  if ((int64_t)Val < 0)
11613  Val = ~Val;
11614  if (BitSize == 32)
11615  Val &= (1LL << 32) - 1;
11616 
11617  unsigned LZ = countLeadingZeros((uint64_t)Val);
11618  unsigned Shift = (63 - LZ) / 16;
11619  // MOVZ is free so return true for one or fewer MOVK.
11620  return Shift < 3;
11621 }
11622 
11624  unsigned Index) const {
11626  return false;
11627 
11628  return (Index == 0 || Index == ResVT.getVectorNumElements());
11629 }
11630 
11631 /// Turn vector tests of the signbit in the form of:
11632 /// xor (sra X, elt_size(X)-1), -1
11633 /// into:
11634 /// cmge X, X, #0
11636  const AArch64Subtarget *Subtarget) {
11637  EVT VT = N->getValueType(0);
11638  if (!Subtarget->hasNEON() || !VT.isVector())
11639  return SDValue();
11640 
11641  // There must be a shift right algebraic before the xor, and the xor must be a
11642  // 'not' operation.
11643  SDValue Shift = N->getOperand(0);
11644  SDValue Ones = N->getOperand(1);
11645  if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
11647  return SDValue();
11648 
11649  // The shift should be smearing the sign bit across each vector element.
11650  auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
11651  EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
11652  if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
11653  return SDValue();
11654 
11655  return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
11656 }
11657 
11658 // VECREDUCE_ADD( EXTEND(v16i8_type) ) to
11659 // VECREDUCE_ADD( DOTv16i8(v16i8_type) )
11661  const AArch64Subtarget *ST) {
11662  SDValue Op0 = N->getOperand(0);
11663  if (!ST->hasDotProd() || N->getValueType(0) != MVT::i32)
11664  return SDValue();
11665 
11667  return SDValue();
11668 
11669  unsigned ExtOpcode = Op0.getOpcode();
11670  if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
11671  return SDValue();
11672 
11673  EVT Op0VT = Op0.getOperand(0).getValueType();
11674  if (Op0VT != MVT::v16i8)
11675  return SDValue();
11676 
11677  SDLoc DL(Op0);
11678  SDValue Ones = DAG.getConstant(1, DL, Op0VT);
11679  SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
11680  auto DotIntrisic = (ExtOpcode == ISD::ZERO_EXTEND)
11681  ? Intrinsic::aarch64_neon_udot
11682  : Intrinsic::aarch64_neon_sdot;
11684  DAG.getConstant(DotIntrisic, DL, MVT::i32), Zeros,
11685  Ones, Op0.getOperand(0));
11686  return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
11687 }
11688 
11689 // Given a ABS node, detect the following pattern:
11690 // (ABS (SUB (EXTEND a), (EXTEND b))).
11691 // Generates UABD/SABD instruction.
11694  const AArch64Subtarget *Subtarget) {
11695  SDValue AbsOp1 = N->getOperand(0);
11696  SDValue Op0, Op1;
11697 
11698  if (AbsOp1.getOpcode() != ISD::SUB)
11699  return SDValue();
11700 
11701  Op0 = AbsOp1.getOperand(0);
11702  Op1 = AbsOp1.getOperand(1);
11703 
11704  unsigned Opc0 = Op0.getOpcode();
11705  // Check if the operands of the sub are (zero|sign)-extended.
11706  if (Opc0 != Op1.getOpcode() ||
11707  (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND))
11708  return SDValue();
11709 
11710  EVT VectorT1 = Op0.getOperand(0).getValueType();
11711  EVT VectorT2 = Op1.getOperand(0).getValueType();
11712  // Check if vectors are of same type and valid size.
11713  uint64_t Size = VectorT1.getFixedSizeInBits();
11714  if (VectorT1 != VectorT2 || (Size != 64 && Size != 128))
11715  return SDValue();
11716 
11717  // Check if vector element types are valid.
11718  EVT VT1 = VectorT1.getVectorElementType();
11719  if (VT1 != MVT::i8 && VT1 != MVT::i16 && VT1 != MVT::i32)
11720  return SDValue();
11721 
11722  Op0 = Op0.getOperand(0);
11723  Op1 = Op1.getOperand(0);
11724  unsigned ABDOpcode =
11726  SDValue ABD =
11727  DAG.getNode(ABDOpcode, SDLoc(N), Op0->getValueType(0), Op0, Op1);
11728  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), ABD);
11729 }
11730 
11733  const AArch64Subtarget *Subtarget) {
11734  if (DCI.isBeforeLegalizeOps())
11735  return SDValue();
11736 
11737  return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
11738 }
11739 
11740 SDValue
11741 AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
11742  SelectionDAG &DAG,
11743  SmallVectorImpl<SDNode *> &Created) const {
11745  if (isIntDivCheap(N->getValueType(0), Attr))
11746  return SDValue(N,0); // Lower SDIV as SDIV
11747 
11748  // fold (sdiv X, pow2)
11749  EVT VT = N->getValueType(0);
11750  if ((VT != MVT::i32 && VT != MVT::i64) ||
11751  !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
11752  return SDValue();
11753 
11754  SDLoc DL(N);
11755  SDValue N0 = N->getOperand(0);
11756  unsigned Lg2 = Divisor.countTrailingZeros();
11757  SDValue Zero = DAG.getConstant(0, DL, VT);
11758  SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
11759 
11760  // Add (N0 < 0) ? Pow2 - 1 : 0;
11761  SDValue CCVal;
11762  SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
11763  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
11764  SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
11765 
11766  Created.push_back(Cmp.getNode());
11767  Created.push_back(Add.getNode());
11768  Created.push_back(CSel.getNode());
11769 
11770  // Divide by pow2.
11771  SDValue SRA =
11772  DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
11773 
11774  // If we're dividing by a positive value, we're done. Otherwise, we must
11775  // negate the result.
11776  if (Divisor.isNonNegative())
11777  return SRA;
11778 
11779  Created.push_back(SRA.getNode());
11780  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
11781 }
11782 
11783 static bool IsSVECntIntrinsic(SDValue S) {
11784  switch(getIntrinsicID(S.getNode())) {
11785  default:
11786  break;
11787  case Intrinsic::aarch64_sve_cntb:
11788  case Intrinsic::aarch64_sve_cnth:
11789  case Intrinsic::aarch64_sve_cntw:
11790  case Intrinsic::aarch64_sve_cntd:
11791  return true;
11792  }
11793  return false;
11794 }
11795 
11796 /// Calculates what the pre-extend type is, based on the extension
11797 /// operation node provided by \p Extend.
11798 ///
11799 /// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
11800 /// pre-extend type is pulled directly from the operand, while other extend
11801 /// operations need a bit more inspection to get this information.
11802 ///
11803 /// \param Extend The SDNode from the DAG that represents the extend operation
11804 /// \param DAG The SelectionDAG hosting the \p Extend node
11805 ///
11806 /// \returns The type representing the \p Extend source type, or \p MVT::Other
11807 /// if no valid type can be determined
11809  switch (Extend.getOpcode()) {
11810  case ISD::SIGN_EXTEND:
11811  case ISD::ZERO_EXTEND:
11812  return Extend.getOperand(0).getValueType();
11813  case ISD::AssertSext:
11814  case ISD::AssertZext:
11815  case ISD::SIGN_EXTEND_INREG: {
11816  VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
11817  if (!TypeNode)
11818  return MVT::Other;
11819  return TypeNode->getVT();
11820  }
11821  case ISD::AND: {
11823  dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
11824  if (!Constant)
11825  return MVT::Other;
11826 
11827  uint32_t Mask = Constant->getZExtValue();
11828 
11829  if (Mask == UCHAR_MAX)
11830  return MVT::i8;
11831  else if (Mask == USHRT_MAX)
11832  return MVT::i16;
11833  else if (Mask == UINT_MAX)
11834  return MVT::i32;
11835 
11836  return MVT::Other;
11837  }
11838  default:
11839  return MVT::Other;
11840  }
11841 
11842  llvm_unreachable("Code path unhandled in calculatePreExtendType!");
11843 }
11844 
11845 /// Combines a dup(sext/zext) node pattern into sext/zext(dup)
11846 /// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
11848  SelectionDAG &DAG) {
11849 
11850  ShuffleVectorSDNode *ShuffleNode =
11851  dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
11852  if (!ShuffleNode)
11853  return SDValue();
11854 
11855  // Ensuring the mask is zero before continuing
11856  if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
11857  return SDValue();
11858 
11859  SDValue InsertVectorElt = VectorShuffle.getOperand(0);
11860 
11861  if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
11862  return SDValue();
11863 
11864  SDValue InsertLane = InsertVectorElt.getOperand(2);
11865  ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
11866  // Ensures the insert is inserting into lane 0
11867  if (!Constant || Constant->getZExtValue() != 0)
11868  return SDValue();
11869 
11870  SDValue Extend = InsertVectorElt.getOperand(1);
11871  unsigned ExtendOpcode = Extend.getOpcode();
11872 
11873  bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
11874  ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
11875  ExtendOpcode == ISD::AssertSext;
11876  if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
11877  ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
11878  return SDValue();
11879 
11880  EVT TargetType = VectorShuffle.getValueType();
11881  EVT PreExtendType = calculatePreExtendType(Extend, DAG);
11882 
11883  if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
11884  TargetType != MVT::v2i64) ||
11885  (PreExtendType == MVT::Other))
11886  return SDValue();
11887 
11888  // Restrict valid pre-extend data type
11889  if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
11890  PreExtendType != MVT::i32)
11891  return SDValue();
11892 
11893  EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
11894 
11895  if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
11896  return SDValue();
11897 
11898  if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
11899  return SDValue();
11900 
11901  SDLoc DL(VectorShuffle);
11902 
11903  SDValue InsertVectorNode = DAG.getNode(
11904  InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
11905  DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
11906  DAG.getConstant(0, DL, MVT::i64));
11907 
11908  std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
11909 
11910  SDValue VectorShuffleNode =
11911  DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
11912  DAG.getUNDEF(PreExtendVT), ShuffleMask);
11913 
11914  SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
11915  DL, TargetType, VectorShuffleNode);
11916 
11917  return ExtendNode;
11918 }
11919 
11920 /// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
11921 /// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
11923  // If the value type isn't a vector, none of the operands are going to be dups
11924  if (!Mul->getValueType(0).isVector())
11925  return SDValue();
11926 
11927  SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
11928  SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
11929 
11930  // Neither operands have been changed, don't make any further changes
11931  if (!Op0 && !Op1)
11932  return SDValue();
11933 
11934  SDLoc DL(Mul);
11935  return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
11936  Op0 ? Op0 : Mul->getOperand(0),
11937  Op1 ? Op1 : Mul->getOperand(1));
11938 }
11939 
11942  const AArch64Subtarget *Subtarget) {
11943 
11945  return Ext;
11946 
11947  if (DCI.isBeforeLegalizeOps())
11948  return SDValue();
11949 
11950  // The below optimizations require a constant RHS.
11951  if (!isa<ConstantSDNode>(N->getOperand(1)))
11952  return SDValue();
11953 
11954  SDValue N0 = N->getOperand(0);
11955  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
11956  const APInt &ConstValue = C->getAPIntValue();
11957 
11958  // Allow the scaling to be folded into the `cnt` instruction by preventing
11959  // the scaling to be obscured here. This makes it easier to pattern match.
11960  if (IsSVECntIntrinsic(N0) ||
11961  (N0->getOpcode() == ISD::TRUNCATE &&
11962  (IsSVECntIntrinsic(N0->getOperand(0)))))
11963  if (ConstValue.sge(1) && ConstValue.sle(16))
11964  return SDValue();
11965 
11966  // Multiplication of a power of two plus/minus one can be done more
11967  // cheaply as as shift+add/sub. For now, this is true unilaterally. If
11968  // future CPUs have a cheaper MADD instruction, this may need to be
11969  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
11970  // 64-bit is 5 cycles, so this is always a win.
11971  // More aggressively, some multiplications N0 * C can be lowered to
11972  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
11973  // e.g. 6=3*2=(2+1)*2.
11974  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
11975  // which equals to (1+2)*16-(1+2).
11976  // TrailingZeroes is used to test if the mul can be lowered to
11977  // shift+add+shift.
11978  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
11979  if (TrailingZeroes) {
11980  // Conservatively do not lower to shift+add+shift if the mul might be
11981  // folded into smul or umul.
11982  if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
11983  isZeroExtended(N0.getNode(), DAG)))
11984  return SDValue();
11985  // Conservatively do not lower to shift+add+shift if the mul might be
11986  // folded into madd or msub.
11987  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
11988  N->use_begin()->getOpcode() == ISD::SUB))
11989  return SDValue();
11990  }
11991  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
11992  // and shift+add+shift.
11993  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
11994 
11995  unsigned ShiftAmt, AddSubOpc;
11996  // Is the shifted value the LHS operand of the add/sub?
11997  bool ShiftValUseIsN0 = true;
11998  // Do we need to negate the result?
11999  bool NegateResult = false;
12000 
12001  if (ConstValue.isNonNegative()) {
12002  // (mul x, 2^N + 1) => (add (shl x, N), x)
12003  // (mul x, 2^N - 1) => (sub (shl x, N), x)
12004  // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
12005  APInt SCVMinus1 = ShiftedConstValue - 1;
12006  APInt CVPlus1 = ConstValue + 1;
12007  if (SCVMinus1.isPowerOf2()) {
12008  ShiftAmt = SCVMinus1.logBase2();
12009  AddSubOpc = ISD::ADD;
12010  } else if (CVPlus1.isPowerOf2()) {
12011  ShiftAmt = CVPlus1.logBase2();
12012  AddSubOpc = ISD::SUB;
12013  } else
12014  return SDValue();
12015  } else {
12016  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
12017  // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
12018  APInt CVNegPlus1 = -ConstValue + 1;
12019  APInt CVNegMinus1 = -ConstValue - 1;
12020  if (CVNegPlus1.isPowerOf2()) {
12021  ShiftAmt = CVNegPlus1.logBase2();
12022  AddSubOpc = ISD::SUB;
12023  ShiftValUseIsN0 = false;
12024  } else if (CVNegMinus1.isPowerOf2()) {
12025  ShiftAmt = CVNegMinus1.logBase2();
12026  AddSubOpc = ISD::ADD;
12027  NegateResult = true;
12028  } else
12029  return SDValue();
12030  }
12031 
12032  SDLoc DL(N);
12033  EVT VT = N->getValueType(0);
12034  SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
12035  DAG.getConstant(ShiftAmt, DL, MVT::i64));
12036 
12037  SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
12038  SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
12039  SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
12040  assert(!(NegateResult && TrailingZeroes) &&
12041  "NegateResult and TrailingZeroes cannot both be true for now.");
12042  // Negate the result.
12043  if (NegateResult)
12044  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
12045  // Shift the result.
12046  if (TrailingZeroes)
12047  return DAG.getNode(ISD::SHL, DL, VT, Res,
12048  DAG.getConstant(TrailingZeroes, DL, MVT::i64));
12049  return Res;
12050 }
12051 
12053  SelectionDAG &DAG) {
12054  // Take advantage of vector comparisons producing 0 or -1 in each lane to
12055  // optimize away operation when it's from a constant.
12056  //
12057  // The general transformation is:
12058  // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
12059  // AND(VECTOR_CMP(x,y), constant2)
12060  // constant2 = UNARYOP(constant)
12061 
12062  // Early exit if this isn't a vector operation, the operand of the
12063  // unary operation isn't a bitwise AND, or if the sizes of the operations
12064  // aren't the same.
12065  EVT VT = N->getValueType(0);
12066  if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
12067  N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
12068  VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
12069  return SDValue();
12070 
12071  // Now check that the other operand of the AND is a constant. We could
12072  // make the transformation for non-constant splats as well, but it's unclear
12073  // that would be a benefit as it would not eliminate any operations, just
12074  // perform one more step in scalar code before moving to the vector unit.
12075  if (BuildVectorSDNode *BV =
12076  dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
12077  // Bail out if the vector isn't a constant.
12078  if (!BV->isConstant())
12079  return SDValue();
12080 
12081  // Everything checks out. Build up the new and improved node.
12082  SDLoc DL(N);
12083  EVT IntVT = BV->getValueType(0);
12084  // Create a new constant of the appropriate type for the transformed
12085  // DAG.
12086  SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
12087  // The AND node needs bitcasts to/from an integer vector type around it.
12088  SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
12089  SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
12090  N->getOperand(0)->getOperand(0), MaskConst);
12091  SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
12092  return Res;
12093  }
12094 
12095  return SDValue();
12096 }
12097 
12099  const AArch64Subtarget *Subtarget) {
12100  // First try to optimize away the conversion when it's conditionally from
12101  // a constant. Vectors only.
12103  return Res;
12104 
12105  EVT VT = N->getValueType(0);
12106  if (VT != MVT::f32 && VT != MVT::f64)
12107  return SDValue();
12108 
12109  // Only optimize when the source and destination types have the same width.
12110  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
12111  return SDValue();
12112 
12113  // If the result of an integer load is only used by an integer-to-float
12114  // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
12115  // This eliminates an "integer-to-vector-move" UOP and improves throughput.
12116  SDValue N0 = N->getOperand(0);
12117  if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12118  // Do not change the width of a volatile load.
12119  !cast<LoadSDNode>(N0)->isVolatile()) {
12120  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12121  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12122  LN0->getPointerInfo(), LN0->getAlignment(),
12123  LN0->getMemOperand()->getFlags());
12124 
12125  // Make sure successors of the original load stay after it by updating them
12126  // to use the new Chain.
12127  DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
12128 
12129  unsigned Opcode =
12130  (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
12131  return DAG.getNode(Opcode, SDLoc(N), VT, Load);
12132  }
12133 
12134  return SDValue();
12135 }
12136 
12137 /// Fold a floating-point multiply by power of two into floating-point to
12138 /// fixed-point conversion.
12141  const AArch64Subtarget *Subtarget) {
12142  if (!Subtarget->hasNEON())
12143  return SDValue();
12144 
12145  if (!N->getValueType(0).isSimple())
12146  return SDValue();
12147 
12148  SDValue Op = N->getOperand(0);
12149  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12150  Op.getOpcode() != ISD::FMUL)
12151  return SDValue();
12152 
12153  SDValue ConstVec = Op->getOperand(1);
12154  if (!isa<BuildVectorSDNode>(ConstVec))
12155  return SDValue();
12156 
12157  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
12158  uint32_t FloatBits = FloatTy.getSizeInBits();
12159  if (FloatBits != 32 && FloatBits != 64)
12160  return SDValue();
12161 
12162  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
12163  uint32_t IntBits = IntTy.getSizeInBits();
12164  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12165  return SDValue();
12166 
12167  // Avoid conversions where iN is larger than the float (e.g., float -> i64).
12168  if (IntBits > FloatBits)
12169  return SDValue();
12170 
12171  BitVector UndefElements;
12172  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12173  int32_t Bits = IntBits == 64 ? 64 : 32;
12174  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
12175  if (C == -1 || C == 0 || C > Bits)
12176  return SDValue();
12177 
12178  MVT ResTy;
12179  unsigned NumLanes = Op.getValueType().getVectorNumElements();
12180  switch (NumLanes) {
12181  default:
12182  return SDValue();
12183  case 2:
12184  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
12185  break;
12186  case 4:
12187  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
12188  break;
12189  }
12190 
12191  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
12192  return SDValue();
12193 
12194  assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&
12195  "Illegal vector type after legalization");
12196 
12197  SDLoc DL(N);
12198  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
12199  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
12200  : Intrinsic::aarch64_neon_vcvtfp2fxu;
12201  SDValue FixConv =
12202  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
12203  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
12204  Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
12205  // We can handle smaller integers by generating an extra trunc.
12206  if (IntBits < FloatBits)
12207  FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
12208 
12209  return FixConv;
12210 }
12211 
12212 /// Fold a floating-point divide by power of two into fixed-point to
12213 /// floating-point conversion.
12216  const AArch64Subtarget *Subtarget) {
12217  if (!Subtarget->hasNEON())
12218  return SDValue();
12219 
12220  SDValue Op = N->getOperand(0);
12221  unsigned Opc = Op->getOpcode();
12222  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12223  !Op.getOperand(0).getValueType().isSimple() ||
12224  (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
12225  return SDValue();
12226 
12227  SDValue ConstVec = N->getOperand(1);
12228  if (!isa<BuildVectorSDNode>(ConstVec))
12229  return SDValue();
12230 
12231  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
12232  int32_t IntBits = IntTy.getSizeInBits();
12233  if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12234  return SDValue();
12235 
12236  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
12237  int32_t FloatBits = FloatTy.getSizeInBits();
12238  if (FloatBits != 32 && FloatBits != 64)
12239  return SDValue();
12240 
12241  // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
12242  if (IntBits > FloatBits)
12243  return SDValue();
12244 
12245  BitVector UndefElements;
12246  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12247  int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
12248  if (C == -1 || C == 0 || C > FloatBits)
12249  return SDValue();
12250 
12251  MVT ResTy;
12252  unsigned NumLanes = Op.getValueType().getVectorNumElements();
12253  switch (NumLanes) {
12254  default:
12255  return SDValue();
12256  case 2:
12257  ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
12258  break;
12259  case 4:
12260  ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
12261  break;
12262  }
12263 
12264  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
12265  return SDValue();
12266 
12267  SDLoc DL(N);
12268  SDValue ConvInput = Op.getOperand(0);
12269  bool IsSigned = Opc == ISD::SINT_TO_FP;
12270  if (IntBits < FloatBits)
12271  ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
12272  ResTy, ConvInput);
12273 
12274  unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
12275  : Intrinsic::aarch64_neon_vcvtfxu2fp;
12276  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
12277  DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
12278  DAG.getConstant(C, DL, MVT::i32));
12279 }
12280 
12281 /// An EXTR instruction is made up of two shifts, ORed together. This helper
12282 /// searches for and classifies those shifts.
12283 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
12284  bool &FromHi) {
12285  if (N.getOpcode() == ISD::SHL)
12286  FromHi = false;
12287  else if (N.getOpcode() == ISD::SRL)
12288  FromHi = true;
12289  else
12290  return false;
12291 
12292  if (!isa<ConstantSDNode>(N.getOperand(1)))
12293  return false;
12294 
12295  ShiftAmount = N->getConstantOperandVal(1);
12296  Src = N->getOperand(0);
12297  return true;
12298 }
12299 
12300 /// EXTR instruction extracts a contiguous chunk of bits from two existing
12301 /// registers viewed as a high/low pair. This function looks for the pattern:
12302 /// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
12303 /// with an EXTR. Can't quite be done in TableGen because the two immediates
12304 /// aren't independent.
12307  SelectionDAG &DAG = DCI.DAG;
12308  SDLoc DL(N);
12309  EVT VT = N->getValueType(0);
12310 
12311  assert(N->getOpcode() == ISD::OR && "Unexpected root");
12312 
12313  if (VT != MVT::i32 && VT != MVT::i64)
12314  return SDValue();
12315 
12316  SDValue LHS;
12317  uint32_t ShiftLHS = 0;
12318  bool LHSFromHi = false;
12319  if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
12320  return SDValue();
12321 
12322  SDValue RHS;
12323  uint32_t ShiftRHS = 0;
12324  bool RHSFromHi = false;
12325  if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
12326  return SDValue();
12327 
12328  // If they're both trying to come from the high part of the register, they're
12329  // not really an EXTR.
12330  if (LHSFromHi == RHSFromHi)
12331  return SDValue();
12332 
12333  if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
12334  return SDValue();
12335 
12336  if (LHSFromHi) {
12337  std::swap(LHS, RHS);
12338  std::swap(ShiftLHS, ShiftRHS);
12339  }
12340 
12341  return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
12342  DAG.getConstant(ShiftRHS, DL, MVT::i64));
12343 }
12344 
12347  EVT VT = N->getValueType(0);
12348  SelectionDAG &DAG = DCI.DAG;
12349  SDLoc DL(N);
12350 
12351  if (!VT.isVector())
12352  return SDValue();
12353 
12354  SDValue N0 = N->getOperand(0);
12355  if (N0.getOpcode() != ISD::AND)
12356  return SDValue();
12357 
12358  SDValue N1 = N->getOperand(1);
12359  if (N1.getOpcode() != ISD::AND)
12360  return SDValue();
12361 
12362  // We only have to look for constant vectors here since the general, variable
12363  // case can be handled in TableGen.
12364  unsigned Bits = VT.getScalarSizeInBits();
12365  uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
12366  for (int i = 1; i >= 0; --i)
12367  for (int j = 1; j >= 0; --j) {
12368  BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
12369  BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
12370  if (!BVN0 || !BVN1)
12371  continue;
12372 
12373  bool FoundMatch = true;
12374  for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
12375  ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
12376  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
12377  if (!CN0 || !CN1 ||
12378  CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
12379  FoundMatch = false;
12380  break;
12381  }
12382  }
12383 
12384  if (FoundMatch)
12385  return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
12386  N0->getOperand(1 - i), N1->getOperand(1 - j));
12387  }
12388 
12389  return SDValue();
12390 }
12391 
12393  const AArch64Subtarget *Subtarget) {
12394  // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
12395  SelectionDAG &DAG = DCI.DAG;
12396  EVT VT = N->getValueType(0);
12397 
12398  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
12399  return SDValue();
12400 
12401  if (SDValue Res = tryCombineToEXTR(N, DCI))
12402  return Res;
12403 
12404  if (SDValue Res = tryCombineToBSL(N, DCI))
12405  return Res;
12406 
12407  return SDValue();
12408 }
12409 
12411  if (!MemVT.getVectorElementType().isSimple())
12412  return false;
12413 
12414  uint64_t MaskForTy = 0ull;
12415  switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
12416  case MVT::i8:
12417  MaskForTy = 0xffull;
12418  break;
12419  case MVT::i16:
12420  MaskForTy = 0xffffull;
12421  break;
12422  case MVT::i32:
12423  MaskForTy = 0xffffffffull;
12424  break;
12425  default:
12426  return false;
12427  break;
12428  }
12429 
12430  if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
12431  if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
12432  return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
12433 
12434  return false;
12435 }
12436 
12439  if (DCI.isBeforeLegalizeOps())
12440  return SDValue();
12441 
12442  SelectionDAG &DAG = DCI.DAG;
12443  SDValue Src = N->getOperand(0);
12444  unsigned Opc = Src->getOpcode();
12445 
12446  // Zero/any extend of an unsigned unpack
12447  if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
12448  SDValue UnpkOp = Src->getOperand(0);
12449  SDValue Dup = N->getOperand(1);
12450 
12451  if (Dup.getOpcode() != AArch64ISD::DUP)
12452  return SDValue();
12453 
12454  SDLoc DL(N);
12455  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
12456  uint64_t ExtVal = C->getZExtValue();
12457 
12458  // If the mask is fully covered by the unpack, we don't need to push
12459  // a new AND onto the operand
12460  EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
12461  if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
12462  (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
12463  (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
12464  return Src;
12465 
12466  // Truncate to prevent a DUP with an over wide constant
12467  APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
12468 
12469  // Otherwise, make sure we propagate the AND to the operand
12470  // of the unpack
12471  Dup = DAG.getNode(AArch64ISD::DUP, DL,
12472  UnpkOp->getValueType(0),
12473  DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
12474 
12475  SDValue And = DAG.getNode(ISD::AND, DL,
12476  UnpkOp->getValueType(0), UnpkOp, Dup);
12477 
12478  return DAG.getNode(Opc, DL, N->getValueType(0), And);
12479  }
12480 
12482  return SDValue();
12483 
12484  SDValue Mask = N->getOperand(1);
12485 
12486  if (!Src.hasOneUse())
12487  return SDValue();
12488 
12489  EVT MemVT;
12490 
12491  // SVE load instructions perform an implicit zero-extend, which makes them
12492  // perfect candidates for combining.
12493  switch (Opc) {
12497  MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
12498  break;
12514  MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
12515  break;
12516  default:
12517  return SDValue();
12518  }
12519 
12520  if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
12521  return Src;
12522 
12523  return SDValue();
12524 }
12525 
12528  SelectionDAG &DAG = DCI.DAG;
12529  SDValue LHS = N->getOperand(0);
12530  EVT VT = N->getValueType(0);
12531  if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
12532  return SDValue();
12533 
12534  if (VT.isScalableVector())
12535  return performSVEAndCombine(N, DCI);
12536 
12537  // The combining code below works only for NEON vectors. In particular, it
12538  // does not work for SVE when dealing with vectors wider than 128 bits.
12539  if (!(VT.is64BitVector() || VT.is128BitVector()))
12540  return SDValue();
12541 
12542  BuildVectorSDNode *BVN =
12543  dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
12544  if (!BVN)
12545  return SDValue();
12546 
12547  // AND does not accept an immediate, so check if we can use a BIC immediate
12548  // instruction instead. We do this here instead of using a (and x, (mvni imm))
12549  // pattern in isel, because some immediates may be lowered to the preferred
12550  // (and x, (movi imm)) form, even though an mvni representation also exists.
12551  APInt DefBits(VT.getSizeInBits(), 0);
12552  APInt UndefBits(VT.getSizeInBits(), 0);
12553  if (resolveBuildVector(BVN, DefBits, UndefBits)) {
12554  SDValue NewOp;
12555 
12556  DefBits = ~DefBits;
12557  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
12558  DefBits, &LHS)) ||
12559  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
12560  DefBits, &LHS)))
12561  return NewOp;
12562 
12563  UndefBits = ~UndefBits;
12564  if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
12565  UndefBits, &LHS)) ||
12566  (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
12567  UndefBits, &LHS)))
12568  return NewOp;
12569  }
12570 
12571  return SDValue();
12572 }
12573 
12576  SelectionDAG &DAG = DCI.DAG;
12577  EVT VT = N->getValueType(0);
12578  if (VT != MVT::i32 && VT != MVT::i64)
12579  return SDValue();
12580 
12581  // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
12582  // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
12583  // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
12584  SDValue N0 = N->getOperand(0);
12585  if (N0.getOpcode() == ISD::BSWAP) {
12586  SDLoc DL(N);
12587  SDValue N1 = N->getOperand(1);
12588  SDValue N00 = N0.getOperand(0);
12589  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
12590  uint64_t ShiftAmt = C->getZExtValue();
12591  if (VT == MVT::i32 && ShiftAmt == 16 &&
12592  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
12593  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
12594  if (VT == MVT::i64 && ShiftAmt == 32 &&
12595  DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
12596  return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
12597  }
12598  }
12599  return SDValue();
12600 }
12601 
12602 // Attempt to form urhadd(OpA, OpB) from
12603 // truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
12604 // or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
12605 // The original form of the first expression is
12606 // truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
12607 // (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
12608 // Before this function is called the srl will have been lowered to
12609 // AArch64ISD::VLSHR.
12610 // This pass can also recognize signed variants of the patterns that use sign
12611 // extension instead of zero extension and form a srhadd(OpA, OpB) or a
12612 // shadd(OpA, OpB) from them.
12613 static SDValue
12615  SelectionDAG &DAG) {
12616  EVT VT = N->getValueType(0);
12617 
12618  // Since we are looking for a right shift by a constant value of 1 and we are
12619  // operating on types at least 16 bits in length (sign/zero extended OpA and
12620  // OpB, which are at least 8 bits), it follows that the truncate will always
12621  // discard the shifted-in bit and therefore the right shift will be logical
12622  // regardless of the signedness of OpA and OpB.
12623  SDValue Shift = N->getOperand(0);
12624  if (Shift.getOpcode() != AArch64ISD::VLSHR)
12625  return SDValue();
12626 
12627  // Is the right shift using an immediate value of 1?
12628  uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
12629  if (ShiftAmount != 1)
12630  return SDValue();
12631 
12632  SDValue ExtendOpA, ExtendOpB;
12633  SDValue ShiftOp0 = Shift.getOperand(0);
12634  unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
12635  if (ShiftOp0Opc == ISD::SUB) {
12636 
12637  SDValue Xor = ShiftOp0.getOperand(1);
12638  if (Xor.getOpcode() != ISD::XOR)
12639  return SDValue();
12640 
12641  // Is the XOR using a constant amount of all ones in the right hand side?
12642  uint64_t C;
12643  if (!isAllConstantBuildVector(Xor.getOperand(1), C))
12644  return SDValue();
12645 
12646  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
12647  APInt CAsAPInt(ElemSizeInBits, C);
12648  if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
12649  return SDValue();
12650 
12651  ExtendOpA = Xor.getOperand(0);
12652  ExtendOpB = ShiftOp0.getOperand(0);
12653  } else if (ShiftOp0Opc == ISD::ADD) {
12654  ExtendOpA = ShiftOp0.getOperand(0);
12655  ExtendOpB = ShiftOp0.getOperand(1);
12656  } else
12657  return SDValue();
12658 
12659  unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
12660  unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
12661  if (!(ExtendOpAOpc == ExtendOpBOpc &&
12662  (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
12663  return SDValue();
12664 
12665  // Is the result of the right shift being truncated to the same value type as
12666  // the original operands, OpA and OpB?
12667  SDValue OpA = ExtendOpA.getOperand(0);
12668  SDValue OpB = ExtendOpB.getOperand(0);
12669  EVT OpAVT = OpA.getValueType();
12670  assert(ExtendOpA.getValueType() == ExtendOpB.getValueType());
12671  if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
12672  return SDValue();
12673 
12674  SDLoc DL(N);
12675  bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
12676  bool IsRHADD = ShiftOp0Opc == ISD::SUB;
12677  unsigned HADDOpc = IsSignExtend
12678  ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
12679  : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
12680  SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
12681 
12682  return ResultHADD;
12683 }
12684 
12685 static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
12686  switch (Opcode) {
12687  case ISD::FADD:
12688  return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
12689  case ISD::ADD:
12690  return VT == MVT::i64;
12691  default:
12692  return false;
12693  }
12694 }
12695 
12697  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12698  ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
12699 
12700  EVT VT = N->getValueType(0);
12701  const bool FullFP16 =
12702  static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
12703 
12704  // Rewrite for pairwise fadd pattern
12705  // (f32 (extract_vector_elt
12706  // (fadd (vXf32 Other)
12707  // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
12708  // ->
12709  // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
12710  // (extract_vector_elt (vXf32 Other) 1))
12711  if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
12712  hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
12713  SDLoc DL(N0);
12714  SDValue N00 = N0->getOperand(0);
12715  SDValue N01 = N0->getOperand(1);
12716 
12717  ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
12718  SDValue Other = N00;
12719 
12720  // And handle the commutative case.
12721  if (!Shuffle) {
12722  Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
12723  Other = N01;
12724  }
12725 
12726  if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
12727  Other == Shuffle->getOperand(0)) {
12728  return DAG.getNode(N0->getOpcode(), DL, VT,
12730  DAG.getConstant(0, DL, MVT::i64)),
12732  DAG.getConstant(1, DL, MVT::i64)));
12733  }
12734  }
12735 
12736  return SDValue();
12737 }
12738 
12741  SelectionDAG &DAG) {
12742  SDLoc dl(N);
12743  EVT VT = N->getValueType(0);
12744  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12745  unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
12746 
12747  // Optimize concat_vectors of truncated vectors, where the intermediate
12748  // type is illegal, to avoid said illegality, e.g.,
12749  // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
12750  // (v2i16 (truncate (v2i64)))))
12751  // ->
12752  // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
12753  // (v4i32 (bitcast (v2i64))),
12754  // <0, 2, 4, 6>)))
12755  // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
12756  // on both input and result type, so we might generate worse code.
12757  // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
12758  if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
12759  N1Opc == ISD::TRUNCATE) {
12760  SDValue N00 = N0->getOperand(0);
12761  SDValue N10 = N1->getOperand(0);
12762  EVT N00VT = N00.getValueType();
12763 
12764  if (N00VT == N10.getValueType() &&
12765  (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
12766  N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
12767  MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
12769  for (size_t i = 0; i < Mask.size(); ++i)
12770  Mask[i] = i * 2;
12771  return DAG.getNode(ISD::TRUNCATE, dl, VT,
12772  DAG.getVectorShuffle(
12773  MidVT, dl,
12774  DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
12775  DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
12776  }
12777  }
12778 
12779  // Wait 'til after everything is legalized to try this. That way we have
12780  // legal vector types and such.
12781  if (DCI.isBeforeLegalizeOps())
12782  return SDValue();
12783 
12784  // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
12785  // subvectors from the same original vectors. Combine these into a single
12786  // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
12787  // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
12788  // extract_subvector (v16i8 OpB,
12789  // <0>))),
12790  // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
12791  // extract_subvector (v16i8 OpB,
12792  // <8>)))))
12793  // ->
12794  // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
12795  if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
12796  (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
12797  N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
12798  SDValue N00 = N0->getOperand(0);
12799  SDValue N01 = N0->getOperand(1);
12800  SDValue N10 = N1->getOperand(0);
12801  SDValue N11 = N1->getOperand(1);
12802 
12803  EVT N00VT = N00.getValueType();
12804  EVT N10VT = N10.getValueType();
12805 
12806  if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12807  N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12808  N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
12809  N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
12810  SDValue N00Source = N00->getOperand(0);
12811  SDValue N01Source = N01->getOperand(0);
12812  SDValue N10Source = N10->getOperand(0);
12813  SDValue N11Source = N11->getOperand(0);
12814 
12815  if (N00Source == N10Source && N01Source == N11Source &&
12816  N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
12817  assert(N0.getValueType() == N1.getValueType());
12818 
12819  uint64_t N00Index = N00.getConstantOperandVal(1);
12820  uint64_t N01Index = N01.getConstantOperandVal(1);
12821  uint64_t N10Index = N10.getConstantOperandVal(1);
12822  uint64_t N11Index = N11.getConstantOperandVal(1);
12823 
12824  if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
12825  N10Index == N00VT.getVectorNumElements())
12826  return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
12827  }
12828  }
12829  }
12830 
12831  // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
12832  // splat. The indexed instructions are going to be expecting a DUPLANE64, so
12833  // canonicalise to that.
12834  if (N0 == N1 && VT.getVectorNumElements() == 2) {
12835  assert(VT.getScalarSizeInBits() == 64);
12836  return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
12837  DAG.getConstant(0, dl, MVT::i64));
12838  }
12839 
12840  // Canonicalise concat_vectors so that the right-hand vector has as few
12841  // bit-casts as possible before its real operation. The primary matching
12842  // destination for these operations will be the narrowing "2" instructions,
12843  // which depend on the operation being performed on this right-hand vector.
12844  // For example,
12845  // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
12846  // becomes
12847  // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
12848 
12849  if (N1Opc != ISD::BITCAST)
12850  return SDValue();
12851  SDValue RHS = N1->getOperand(0);
12852  MVT RHSTy = RHS.getValueType().getSimpleVT();
12853  // If the RHS is not a vector, this is not the pattern we're looking for.
12854  if (!RHSTy.isVector())
12855  return SDValue();
12856 
12857  LLVM_DEBUG(
12858  dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
12859 
12860  MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
12861  RHSTy.getVectorNumElements() * 2);
12862  return DAG.getNode(ISD::BITCAST, dl, VT,
12863  DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
12864  DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
12865  RHS));
12866 }
12867 
12870  SelectionDAG &DAG) {
12871  // Wait until after everything is legalized to try this. That way we have
12872  // legal vector types and such.
12873  if (DCI.isBeforeLegalizeOps())
12874  return SDValue();
12875  // Transform a scalar conversion of a value from a lane extract into a
12876  // lane extract of a vector conversion. E.g., from foo1 to foo2:
12877  // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
12878  // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
12879  //
12880  // The second form interacts better with instruction selection and the
12881  // register allocator to avoid cross-class register copies that aren't
12882  // coalescable due to a lane reference.
12883 
12884  // Check the operand and see if it originates from a lane extract.
12885  SDValue Op1 = N->getOperand(1);
12886  if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12887  // Yep, no additional predication needed. Perform the transform.
12888  SDValue IID = N->getOperand(0);
12889  SDValue Shift = N->getOperand(2);
12890  SDValue Vec = Op1.getOperand(0);
12891  SDValue Lane = Op1.getOperand(1);
12892  EVT ResTy = N->getValueType(0);
12893  EVT VecResTy;
12894  SDLoc DL(N);
12895 
12896  // The vector width should be 128 bits by the time we get here, even
12897  // if it started as 64 bits (the extract_vector handling will have
12898  // done so).
12899  assert(Vec.getValueSizeInBits() == 128 &&
12900  "unexpected vector size on extract_vector_elt!");
12901  if (Vec.getValueType() == MVT::v4i32)
12902  VecResTy = MVT::v4f32;
12903  else if (Vec.getValueType() == MVT::v2i64)
12904  VecResTy = MVT::v2f64;
12905  else
12906  llvm_unreachable("unexpected vector type!");
12907 
12908  SDValue Convert =
12909  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
12910  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
12911  }
12912  return SDValue();
12913 }
12914 
12915 // AArch64 high-vector "long" operations are formed by performing the non-high
12916 // version on an extract_subvector of each operand which gets the high half:
12917 //
12918 // (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
12919 //
12920 // However, there are cases which don't have an extract_high explicitly, but
12921 // have another operation that can be made compatible with one for free. For
12922 // example:
12923 //
12924 // (dupv64 scalar) --> (extract_high (dup128 scalar))
12925 //
12926 // This routine does the actual conversion of such DUPs, once outer routines
12927 // have determined that everything else is in order.
12928 // It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
12929 // similarly here.
12931  switch (N.getOpcode()) {
12932  case AArch64ISD::DUP:
12933  case AArch64ISD::DUPLANE8:
12934  case AArch64ISD::DUPLANE16:
12935  case AArch64ISD::DUPLANE32:
12936  case AArch64ISD::DUPLANE64:
12937  case AArch64ISD::MOVI:
12938  case AArch64ISD::MOVIshift:
12939  case AArch64ISD::MOVIedit:
12940  case AArch64ISD::MOVImsl:
12941  case AArch64ISD::MVNIshift:
12942  case AArch64ISD::MVNImsl:
12943  break;
12944  default:
12945  // FMOV could be supported, but isn't very useful, as it would only occur
12946  // if you passed a bitcast' floating point immediate to an eligible long
12947  // integer op (addl, smull, ...).
12948  return SDValue();
12949  }
12950 
12951  MVT NarrowTy = N.getSimpleValueType();
12952  if (!NarrowTy.is64BitVector())
12953  return SDValue();
12954 
12955  MVT ElementTy = NarrowTy.getVectorElementType();
12956  unsigned NumElems = NarrowTy.getVectorNumElements();
12957  MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
12958 
12959  SDLoc dl(N);
12960  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
12961  DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
12962  DAG.getConstant(NumElems, dl, MVT::i64));
12963 }
12964 
12966  if (N.getOpcode() == ISD::BITCAST)
12967  N = N.getOperand(0);
12968  if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12969  return false;
12970  return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
12971  N.getOperand(0).getValueType().getVectorNumElements() / 2;
12972 }
12973 
12974 /// Helper structure to keep track of ISD::SET_CC operands.
12976  const SDValue *Opnd0;
12977  const SDValue *Opnd1;
12979 };
12980 
12981 /// Helper structure to keep track of a SET_CC lowered into AArch64 code.
12983  const SDValue *Cmp;
12985 };
12986 
12987 /// Helper structure to keep track of SetCC information.
12988 union SetCCInfo {
12991 };
12992 
12993 /// Helper structure to be able to read SetCC information. If set to
12994 /// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
12995 /// GenericSetCCInfo.
12999 };
13000 
13001 /// Check whether or not \p Op is a SET_CC operation, either a generic or
13002 /// an
13003 /// AArch64 lowered one.
13004 /// \p SetCCInfo is filled accordingly.
13005 /// \post SetCCInfo is meanginfull only when this function returns true.
13006 /// \return True when Op is a kind of SET_CC operation.
13008  // If this is a setcc, this is straight forward.
13009  if (Op.getOpcode() == ISD::SETCC) {
13010  SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
13011  SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
13012  SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13013  SetCCInfo.IsAArch64 = false;
13014  return true;
13015  }
13016  // Otherwise, check if this is a matching csel instruction.
13017  // In other words:
13018  // - csel 1, 0, cc
13019  // - csel 0, 1, !cc
13020  if (Op.getOpcode() != AArch64ISD::CSEL)
13021  return false;
13022  // Set the information about the operands.
13023  // TODO: we want the operands of the Cmp not the csel
13024  SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
13025  SetCCInfo.IsAArch64 = true;
13026  SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
13027  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
13028 
13029  // Check that the operands matches the constraints:
13030  // (1) Both operands must be constants.
13031  // (2) One must be 1 and the other must be 0.
13032  ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
13033  ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13034 
13035  // Check (1).
13036  if (!TValue || !FValue)
13037  return false;
13038 
13039  // Check (2).
13040  if (!TValue->isOne()) {
13041  // Update the comparison when we are interested in !cc.
13042  std::swap(TValue, FValue);
13043  SetCCInfo.Info.AArch64.CC =
13045  }
13046  return TValue->isOne() && FValue->isNullValue();
13047 }
13048 
13049 // Returns true if Op is setcc or zext of setcc.
13051  if (isSetCC(Op, Info))
13052  return true;
13053  return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
13054  isSetCC(Op->getOperand(0), Info));
13055 }
13056 
13057 // The folding we want to perform is:
13058 // (add x, [zext] (setcc cc ...) )
13059 // -->
13060 // (csel x, (add x, 1), !cc ...)
13061 //
13062 // The latter will get matched to a CSINC instruction.
13064  assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
13065  SDValue LHS = Op->getOperand(0);
13066  SDValue RHS = Op->getOperand(1);
13067  SetCCInfoAndKind InfoAndKind;
13068 
13069  // If neither operand is a SET_CC, give up.
13070  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
13071  std::swap(LHS, RHS);
13072  if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
13073  return SDValue();
13074  }
13075 
13076  // FIXME: This could be generatized to work for FP comparisons.
13077  EVT CmpVT = InfoAndKind.IsAArch64
13078  ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
13079  : InfoAndKind.Info.Generic.Opnd0->getValueType();
13080  if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
13081  return SDValue();
13082 
13083  SDValue CCVal;
13084  SDValue Cmp;
13085  SDLoc dl(Op);
13086  if (InfoAndKind.IsAArch64) {
13087  CCVal = DAG.getConstant(
13088  AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
13089  MVT::i32);
13090  Cmp = *InfoAndKind.Info.AArch64.Cmp;
13091  } else
13092  Cmp = getAArch64Cmp(
13093  *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
13094  ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
13095  dl);
13096 
13097  EVT VT = Op->getValueType(0);
13098  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
13099  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
13100 }
13101 
13102 // ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
13104  EVT VT = N->getValueType(0);
13105  // Only scalar integer and vector types.
13106  if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
13107  return SDValue();
13108 
13109  SDValue LHS = N->getOperand(0);
13110  SDValue RHS = N->getOperand(1);
13111  if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13112  RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
13113  return SDValue();
13114 
13115  auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
13116  auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
13117  if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
13118  return SDValue();
13119 
13120  SDValue Op1 = LHS->getOperand(0);
13121  SDValue Op2 = RHS->getOperand(0);
13122  EVT OpVT1 = Op1.getValueType();
13123  EVT OpVT2 = Op2.getValueType();
13124  if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
13125  Op2.getOpcode() != AArch64ISD::UADDV ||
13126  OpVT1.getVectorElementType() != VT)
13127  return SDValue();
13128 
13129  SDValue Val1 = Op1.getOperand(0);
13130  SDValue Val2 = Op2.getOperand(0);
13131  EVT ValVT = Val1->getValueType(0);
13132  SDLoc DL(N);
13133  SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
13134  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
13135  DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
13136  DAG.getConstant(0, DL, MVT::i64));
13137 }
13138 
13139 // The basic add/sub long vector instructions have variants with "2" on the end
13140 // which act on the high-half of their inputs. They are normally matched by
13141 // patterns like:
13142 //
13143 // (add (zeroext (extract_high LHS)),
13144 // (zeroext (extract_high RHS)))
13145 // -> uaddl2 vD, vN, vM
13146 //
13147 // However, if one of the extracts is something like a duplicate, this
13148 // instruction can still be used profitably. This function puts the DAG into a
13149 // more appropriate form for those patterns to trigger.
13152  SelectionDAG &DAG) {
13153  if (DCI.isBeforeLegalizeOps())
13154  return SDValue();
13155 
13156  MVT VT = N->getSimpleValueType(0);
13157  if (!VT.is128BitVector()) {
13158  if (N->getOpcode() == ISD::ADD)
13159  return performSetccAddFolding(N, DAG);
13160  return SDValue();
13161  }
13162 
13163  // Make sure both branches are extended in the same way.
13164  SDValue LHS = N->getOperand(0);
13165  SDValue RHS = N->getOperand(1);
13166  if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
13167  LHS.getOpcode() != ISD::SIGN_EXTEND) ||
13168  LHS.getOpcode() != RHS.getOpcode())
13169  return SDValue();
13170 
13171  unsigned ExtType = LHS.getOpcode();
13172 
13173  // It's not worth doing if at least one of the inputs isn't already an
13174  // extract, but we don't know which it'll be so we have to try both.
13176  RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
13177  if (!RHS.getNode())
13178  return SDValue();
13179 
13180  RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
13181  } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
13182  LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
13183  if (!LHS.getNode())
13184  return SDValue();
13185 
13186  LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
13187  }
13188 
13189  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
13190 }
13191 
13194  SelectionDAG &DAG) {
13195  // Try to change sum of two reductions.
13196  if (SDValue Val = performUADDVCombine(N, DAG))
13197  return Val;
13198 
13199  return performAddSubLongCombine(N, DCI, DAG);
13200 }
13201 
13202 // Massage DAGs which we can use the high-half "long" operations on into
13203 // something isel will recognize better. E.g.
13204 //
13205 // (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
13206 // (aarch64_neon_umull (extract_high (v2i64 vec)))
13207 // (extract_high (v2i64 (dup128 scalar)))))
13208 //
13209 static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
13211  SelectionDAG &DAG) {
13212  if (DCI.isBeforeLegalizeOps())
13213  return SDValue();
13214 
13215  SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
13216  SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
13217  assert(LHS.getValueType().is64BitVector() &&
13218  RHS.getValueType().is64BitVector() &&
13219  "unexpected shape for long operation");
13220 
13221  // Either node could be a DUP, but it's not worth doing both of them (you'd
13222  // just as well use the non-high version) so look for a corresponding extract
13223  // operation on the other "wing".
13225  RHS = tryExtendDUPToExtractHigh(RHS, DAG);
13226  if (!RHS.getNode())
13227  return SDValue();
13228  } else if (isEssentiallyExtractHighSubvector(RHS)) {
13229  LHS = tryExtendDUPToExtractHigh(LHS, DAG);
13230  if (!LHS.getNode())
13231  return SDValue();
13232  }
13233 
13234  if (IID == Intrinsic::not_intrinsic)
13235  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
13236 
13237  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
13238  N->getOperand(0), LHS, RHS);
13239 }
13240 
13241 static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
13242  MVT ElemTy = N->getSimpleValueType(0).getScalarType();
13243  unsigned ElemBits = ElemTy.getSizeInBits();
13244 
13245  int64_t ShiftAmount;
13246  if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
13247  APInt SplatValue, SplatUndef;
13248  unsigned SplatBitSize;
13249  bool HasAnyUndefs;
13250  if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
13251  HasAnyUndefs, ElemBits) ||
13252  SplatBitSize != ElemBits)
13253  return SDValue();
13254 
13255  ShiftAmount = SplatValue.getSExtValue();
13256  } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
13257  ShiftAmount = CVN->getSExtValue();
13258  } else
13259  return SDValue();
13260 
13261  unsigned Opcode;
13262  bool IsRightShift;
13263  switch (IID) {
13264  default:
13265  llvm_unreachable("Unknown shift intrinsic");
13266  case Intrinsic::aarch64_neon_sqshl:
13267  Opcode = AArch64ISD::SQSHL_I;
13268  IsRightShift = false;
13269  break;
13270  case Intrinsic::aarch64_neon_uqshl:
13271  Opcode = AArch64ISD::UQSHL_I;
13272  IsRightShift = false;
13273  break;
13274  case Intrinsic::aarch64_neon_srshl:
13275  Opcode = AArch64ISD::SRSHR_I;
13276  IsRightShift = true;
13277  break;
13278  case Intrinsic::aarch64_neon_urshl:
13279  Opcode = AArch64ISD::URSHR_I;
13280  IsRightShift = true;
13281  break;
13282  case Intrinsic::aarch64_neon_sqshlu:
13283  Opcode = AArch64ISD::SQSHLU_I;
13284  IsRightShift = false;
13285  break;
13286  case Intrinsic::aarch64_neon_sshl:
13287  case Intrinsic::aarch64_neon_ushl:
13288  // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
13289  // left shift for positive shift amounts. Below, we only replace the current
13290  // node with VSHL, if this condition is met.
13291  Opcode = AArch64ISD::VSHL;
13292  IsRightShift = false;
13293  break;
13294  }
13295 
13296  if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
13297  SDLoc dl(N);
13298  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
13299  DAG.getConstant(-ShiftAmount, dl, MVT::i32));
13300  } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
13301  SDLoc dl(N);
13302  return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
13303  DAG.getConstant(ShiftAmount, dl, MVT::i32));
13304  }
13305 
13306  return SDValue();
13307 }
13308 
13309 // The CRC32[BH] instructions ignore the high bits of their data operand. Since
13310 // the intrinsics must be legal and take an i32, this means there's almost
13311 // certainly going to be a zext in the DAG which we can eliminate.
13312 static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
13313  SDValue AndN = N->getOperand(2);
13314  if (AndN.getOpcode() != ISD::AND)
13315  return SDValue();
13316 
13317  ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
13318  if (!CMask || CMask->getZExtValue() != Mask)
13319  return SDValue();
13320 
13322  N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
13323 }
13324 
13326  SelectionDAG &DAG) {
13327  SDLoc dl(N);
13328  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
13329  DAG.getNode(Opc, dl,
13330  N->getOperand(1).getSimpleValueType(),
13331  N->getOperand(1)),
13332  DAG.getConstant(0, dl, MVT::i64));
13333 }
13334 
13336  SDLoc DL(N);
13337  SDValue Op1 = N->getOperand(1);
13338  SDValue Op2 = N->getOperand(2);
13339  EVT ScalarTy = Op1.getValueType();
13340 
13341  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16)) {
13342  Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1);
13343  Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2);
13344  }
13345 
13346  return DAG.getNode(AArch64ISD::INDEX_VECTOR, DL, N->getValueType(0),
13347  Op1, Op2);
13348 }
13349 
13351  SDLoc dl(N);
13352  SDValue Scalar = N->getOperand(3);
13353  EVT ScalarTy = Scalar.getValueType();
13354 
13355  if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
13356  Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
13357 
13358  SDValue Passthru = N->getOperand(1);
13359  SDValue Pred = N->getOperand(2);
13360  return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
13361  Pred, Scalar, Passthru);
13362 }
13363 
13365  SDLoc dl(N);
13366  LLVMContext &Ctx = *DAG.getContext();
13367  EVT VT = N->getValueType(0);
13368 
13369  assert(VT.isScalableVector() && "Expected a scalable vector.");
13370 
13371  // Current lowering only supports the SVE-ACLE types.
13373  return SDValue();
13374 
13375  unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
13376  unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
13377  EVT ByteVT =
13379 
13380  // Convert everything to the domain of EXT (i.e bytes).
13381  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
13382  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
13383  SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
13384  DAG.getConstant(ElemSize, dl, MVT::i32));
13385 
13386  SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
13387  return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
13388 }
13389 
13392  SelectionDAG &DAG) {
13393  if (DCI.isBeforeLegalize())
13394  return SDValue();
13395 
13396  SDValue Comparator = N->getOperand(3);
13397  if (Comparator.getOpcode() == AArch64ISD::DUP ||
13398  Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
13399  unsigned IID = getIntrinsicID(N);
13400  EVT VT = N->getValueType(0);
13401  EVT CmpVT = N->getOperand(2).getValueType();
13402  SDValue Pred = N->getOperand(1);
13403  SDValue Imm;
13404  SDLoc DL(N);
13405 
13406  switch (IID) {
13407  default:
13408  llvm_unreachable("Called with wrong intrinsic!");
13409  break;
13410 
13411  // Signed comparisons
13412  case Intrinsic::aarch64_sve_cmpeq_wide:
13413  case Intrinsic::aarch64_sve_cmpne_wide:
13414  case Intrinsic::aarch64_sve_cmpge_wide:
13415  case Intrinsic::aarch64_sve_cmpgt_wide:
13416  case Intrinsic::aarch64_sve_cmplt_wide:
13417  case Intrinsic::aarch64_sve_cmple_wide: {
13418  if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
13419  int64_t ImmVal = CN->getSExtValue();
13420  if (ImmVal >= -16 && ImmVal <= 15)
13421  Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
13422  else
13423  return SDValue();
13424  }
13425  break;
13426  }
13427  // Unsigned comparisons
13428  case Intrinsic::aarch64_sve_cmphs_wide:
13429  case Intrinsic::aarch64_sve_cmphi_wide:
13430  case Intrinsic::aarch64_sve_cmplo_wide:
13431  case Intrinsic::aarch64_sve_cmpls_wide: {
13432  if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
13433  uint64_t ImmVal = CN->getZExtValue();
13434  if (ImmVal <= 127)
13435  Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
13436  else
13437  return SDValue();
13438  }
13439  break;
13440  }
13441  }
13442 
13443  if (!Imm)
13444  return SDValue();
13445 
13446  SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
13447  return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
13448  N->getOperand(2), Splat, DAG.getCondCode(CC));
13449  }
13450 
13451  return SDValue();
13452 }
13453 
13456  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13457 
13458  SDLoc DL(Op);
13459  assert(Op.getValueType().isScalableVector() &&
13460  TLI.isTypeLegal(Op.getValueType()) &&
13461  "Expected legal scalable vector type!");
13462 
13463  // Ensure target specific opcodes are using legal type.
13464  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
13465  SDValue TVal = DAG.getConstant(1, DL, OutVT);
13466  SDValue FVal = DAG.getConstant(0, DL, OutVT);
13467 
13468  // Set condition code (CC) flags.
13470 
13471  // Convert CC to integer based on requested condition.
13472  // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
13474  SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
13475  return DAG.getZExtOrTrunc(Res, DL, VT);
13476 }
13477 
13478 static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
13479  SelectionDAG &DAG) {
13480  SDLoc DL(N);
13481 
13482  SDValue Pred = N->getOperand(1);
13483  SDValue VecToReduce = N->getOperand(2);
13484 
13485  // NOTE: The integer reduction's result type is not always linked to the
13486  // operand's element type so we construct it from the intrinsic's result type.
13487  EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
13488  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
13489 
13490  // SVE reductions set the whole vector register with the first element
13491  // containing the reduction result, which we'll now extract.
13492  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13493  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
13494  Zero);
13495 }
13496 
13497 static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
13498  SelectionDAG &DAG) {
13499  SDLoc DL(N);
13500 
13501  SDValue Pred = N->getOperand(1);
13502  SDValue VecToReduce = N->getOperand(2);
13503 
13504  EVT ReduceVT = VecToReduce.getValueType();
13505  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
13506 
13507  // SVE reductions set the whole vector register with the first element
13508  // containing the reduction result, which we'll now extract.
13509  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13510  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
13511  Zero);
13512 }
13513 
13515  SelectionDAG &DAG) {
13516  SDLoc DL(N);
13517 
13518  SDValue Pred = N->getOperand(1);
13519  SDValue InitVal = N->getOperand(2);
13520  SDValue VecToReduce = N->getOperand(3);
13521  EVT ReduceVT = VecToReduce.getValueType();
13522 
13523  // Ordered reductions use the first lane of the result vector as the
13524  // reduction's initial value.
13525  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
13526  InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
13527  DAG.getUNDEF(ReduceVT), InitVal, Zero);
13528 
13529  SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
13530 
13531  // SVE reductions set the whole vector register with the first element
13532  // containing the reduction result, which we'll now extract.
13533  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
13534  Zero);
13535 }
13536 
13537 // If a merged operation has no inactive lanes we can relax it to a predicated
13538 // or unpredicated operation, which potentially allows better isel (perhaps
13539 // using immediate forms) or relaxing register reuse requirements.
13540 static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc,
13541  SelectionDAG &DAG) {
13542  assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
13543  assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
13544  SDValue Pg = N->getOperand(1);
13545 
13546  // ISD way to specify an all active predicate.
13547  if ((Pg.getOpcode() == AArch64ISD::PTRUE) &&
13549  return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg,
13550  N->getOperand(2), N->getOperand(3));
13551 
13552  // FUTURE: SplatVector(true)
13553  return SDValue();
13554 }
13555 
13558  const AArch64Subtarget *Subtarget) {
13559  SelectionDAG &DAG = DCI.DAG;
13560  unsigned IID = getIntrinsicID(N);
13561  switch (IID) {
13562  default:
13563  break;
13564  case Intrinsic::aarch64_neon_vcvtfxs2fp:
13565  case Intrinsic::aarch64_neon_vcvtfxu2fp:
13566  return tryCombineFixedPointConvert(N, DCI, DAG);
13567  case Intrinsic::aarch64_neon_saddv:
13569  case Intrinsic::aarch64_neon_uaddv:
13571  case Intrinsic::aarch64_neon_sminv:
13573  case Intrinsic::aarch64_neon_uminv:
13575  case Intrinsic::aarch64_neon_smaxv:
13577  case Intrinsic::aarch64_neon_umaxv:
13579  case Intrinsic::aarch64_neon_fmax:
13580  return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
13581  N->getOperand(1), N->getOperand(2));
13582  case Intrinsic::aarch64_neon_fmin:
13583  return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
13584  N->getOperand(1), N->getOperand(2));
13585  case Intrinsic::aarch64_neon_fmaxnm:
13586  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
13587  N->getOperand(1), N->getOperand(2));
13588  case Intrinsic::aarch64_neon_fminnm:
13589  return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
13590  N->getOperand(1), N->getOperand(2));
13591  case Intrinsic::aarch64_neon_smull:
13592  case Intrinsic::aarch64_neon_umull:
13593  case Intrinsic::aarch64_neon_pmull:
13594  case Intrinsic::aarch64_neon_sqdmull:
13595  return tryCombineLongOpWithDup(IID, N, DCI, DAG);
13596  case Intrinsic::aarch64_neon_sqshl:
13597  case Intrinsic::aarch64_neon_uqshl:
13598  case Intrinsic::aarch64_neon_sqshlu:
13599  case Intrinsic::aarch64_neon_srshl:
13600  case Intrinsic::aarch64_neon_urshl:
13601  case Intrinsic::aarch64_neon_sshl:
13602  case Intrinsic::aarch64_neon_ushl:
13603  return tryCombineShiftImm(IID, N, DAG);
13604  case Intrinsic::aarch64_crc32b:
13605  case Intrinsic::aarch64_crc32cb:
13606  return tryCombineCRC32(0xff, N, DAG);
13607  case Intrinsic::aarch64_crc32h:
13608  case Intrinsic::aarch64_crc32ch:
13609  return tryCombineCRC32(0xffff, N, DAG);
13610  case Intrinsic::aarch64_sve_saddv:
13611  // There is no i64 version of SADDV because the sign is irrelevant.
13612  if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
13614  else
13616  case Intrinsic::aarch64_sve_uaddv:
13618  case Intrinsic::aarch64_sve_smaxv:
13620  case Intrinsic::aarch64_sve_umaxv:
13622  case Intrinsic::aarch64_sve_sminv:
13624  case Intrinsic::aarch64_sve_uminv:
13626  case Intrinsic::aarch64_sve_orv:
13628  case Intrinsic::aarch64_sve_eorv:
13630  case Intrinsic::aarch64_sve_andv:
13632  case Intrinsic::aarch64_sve_index:
13633  return LowerSVEIntrinsicIndex(N, DAG);
13634  case Intrinsic::aarch64_sve_dup:
13635  return LowerSVEIntrinsicDUP(N, DAG);
13636  case Intrinsic::aarch64_sve_dup_x:
13637  return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
13638  N->getOperand(1));
13639  case Intrinsic::aarch64_sve_ext:
13640  return LowerSVEIntrinsicEXT(N, DAG);
13641  case Intrinsic::aarch64_sve_smin:
13643  case Intrinsic::aarch64_sve_umin:
13645  case Intrinsic::aarch64_sve_smax:
13647  case Intrinsic::aarch64_sve_umax:
13649  case Intrinsic::aarch64_sve_lsl:
13651  case Intrinsic::aarch64_sve_lsr:
13653  case Intrinsic::aarch64_sve_asr:
13655  case Intrinsic::aarch64_sve_cmphs:
13656  if (!N->getOperand(2).getValueType().isFloatingPoint())
13658  N->getValueType(0), N->getOperand(1), N->getOperand(2),
13659  N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
13660  break;
13661  case Intrinsic::aarch64_sve_cmphi:
13662  if (!N->getOperand(2).getValueType().isFloatingPoint())
13664  N->getValueType(0), N->getOperand(1), N->getOperand(2),
13665  N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
13666  break;
13667  case Intrinsic::aarch64_sve_cmpge:
13668  if (!N->getOperand(2).getValueType().isFloatingPoint())
13670  N->getValueType(0), N->getOperand(1), N->getOperand(2),
13671  N->getOperand(3), DAG.getCondCode(ISD::SETGE));
13672  break;
13673  case Intrinsic::aarch64_sve_cmpgt:
13674  if (!N->getOperand(2).getValueType().isFloatingPoint())
13676  N->getValueType(0), N->getOperand(1), N->getOperand(2),
13677  N->getOperand(3), DAG.getCondCode(ISD::SETGT));
13678  break;
13679  case Intrinsic::aarch64_sve_cmpeq:
13680  if (!N->getOperand(2).getValueType().isFloatingPoint())
13682  N->getValueType(0), N->getOperand(1), N->getOperand(2),
13683  N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
13684  break;
13685  case Intrinsic::aarch64_sve_cmpne:
13686  if (!N->getOperand(2).getValueType().isFloatingPoint())
13688  N->getValueType(0), N->getOperand(1), N->getOperand(2),
13689  N->getOperand(3), DAG.getCondCode(ISD::SETNE));
13690  break;
13691  case Intrinsic::aarch64_sve_fadda:
13693  case Intrinsic::aarch64_sve_faddv:
13695  case Intrinsic::aarch64_sve_fmaxnmv:
13697  case Intrinsic::aarch64_sve_fmaxv:
13699  case Intrinsic::aarch64_sve_fminnmv:
13701  case Intrinsic::aarch64_sve_fminv:
13703  case Intrinsic::aarch64_sve_sel:
13704  return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
13705  N->getOperand(1), N->getOperand(2), N->getOperand(3));
13706  case Intrinsic::aarch64_sve_cmpeq_wide:
13707  return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
13708  case Intrinsic::aarch64_sve_cmpne_wide:
13709  return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
13710  case Intrinsic::aarch64_sve_cmpge_wide:
13711  return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
13712  case Intrinsic::aarch64_sve_cmpgt_wide:
13713  return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
13714  case Intrinsic::aarch64_sve_cmplt_wide:
13715  return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
13716  case Intrinsic::aarch64_sve_cmple_wide:
13717  return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
13718  case Intrinsic::aarch64_sve_cmphs_wide:
13719  return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
13720  case Intrinsic::aarch64_sve_cmphi_wide:
13721  return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
13722  case Intrinsic::aarch64_sve_cmplo_wide:
13723  return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
13724  case Intrinsic::aarch64_sve_cmpls_wide:
13725  return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
13726  case Intrinsic::aarch64_sve_ptest_any:
13727  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
13729  case Intrinsic::aarch64_sve_ptest_first:
13730  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
13732  case Intrinsic::aarch64_sve_ptest_last:
13733  return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
13735  }
13736  return SDValue();
13737 }
13738 
13741  SelectionDAG &DAG) {
13742  // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
13743  // we can convert that DUP into another extract_high (of a bigger DUP), which
13744  // helps the backend to decide that an sabdl2 would be useful, saving a real
13745  // extract_high operation.
13746  if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
13747  (N->getOperand(0).getOpcode() == AArch64ISD::UABD ||
13748  N->getOperand(0).getOpcode() == AArch64ISD::SABD)) {
13749  SDNode *ABDNode = N->getOperand(0).getNode();
13750  SDValue NewABD =
13752  if (!NewABD.getNode())
13753  return SDValue();
13754 
13755  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
13756  }
13757 
13758  // This is effectively a custom type legalization for AArch64.
13759  //
13760  // Type legalization will split an extend of a small, legal, type to a larger
13761  // illegal type by first splitting the destination type, often creating
13762  // illegal source types, which then get legalized in isel-confusing ways,
13763  // leading to really terrible codegen. E.g.,
13764  // %result = v8i32 sext v8i8 %value
13765  // becomes
13766  // %losrc = extract_subreg %value, ...
13767  // %hisrc = extract_subreg %value, ...
13768  // %lo = v4i32 sext v4i8 %losrc
13769  // %hi = v4i32 sext v4i8 %hisrc
13770  // Things go rapidly downhill from there.
13771  //
13772  // For AArch64, the [sz]ext vector instructions can only go up one element
13773  // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
13774  // take two instructions.
13775  //
13776  // This implies that the most efficient way to do the extend from v8i8
13777  // to two v4i32 values is to first extend the v8i8 to v8i16, then do
13778  // the normal splitting to happen for the v8i16->v8i32.
13779 
13780  // This is pre-legalization to catch some cases where the default
13781  // type legalization will create ill-tempered code.
13782  if (!DCI.isBeforeLegalizeOps())
13783  return SDValue();
13784 
13785  // We're only interested in cleaning things up for non-legal vector types
13786  // here. If both the source and destination are legal, things will just
13787  // work naturally without any fiddling.
13788  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13789  EVT ResVT = N->getValueType(0);
13790  if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
13791  return SDValue();
13792  // If the vector type isn't a simple VT, it's beyond the scope of what
13793  // we're worried about here. Let legalization do its thing and hope for
13794  // the best.
13795  SDValue Src = N->getOperand(0);
13796  EVT SrcVT = Src->getValueType(0);
13797  if (!ResVT.isSimple() || !SrcVT.isSimple())
13798  return SDValue();
13799 
13800  // If the source VT is a 64-bit fixed or scalable vector, we can play games
13801  // and get the better results we want.
13802  if (SrcVT.getSizeInBits().getKnownMinSize() != 64)
13803  return SDValue();
13804 
13805  unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
13806  ElementCount SrcEC = SrcVT.getVectorElementCount();
13807  SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), SrcEC);
13808  SDLoc DL(N);
13809  Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
13810 
13811  // Now split the rest of the operation into two halves, each with a 64
13812  // bit source.
13813  EVT LoVT, HiVT;
13814  SDValue Lo, Hi;
13815  LoVT = HiVT = ResVT.getHalfNumVectorElementsVT(*DAG.getContext());
13816 
13817  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
13818  LoVT.getVectorElementCount());
13819  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
13820  DAG.getConstant(0, DL, MVT::i64));
13821  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
13823  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
13824  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
13825 
13826  // Now combine the parts back together so we still have a single result
13827  // like the combiner expects.
13828  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
13829 }
13830 
13832  SDValue SplatVal, unsigned NumVecElts) {
13833  assert(!St.isTruncatingStore() && "cannot split truncating vector store");
13834  unsigned OrigAlignment = St.getAlignment();
13835  unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
13836 
13837  // Create scalar stores. This is at least as good as the code sequence for a
13838  // split unaligned store which is a dup.s, ext.b, and two stores.
13839  // Most of the time the three stores should be replaced by store pair
13840  // instructions (stp).
13841  SDLoc DL(&St);
13842  SDValue BasePtr = St.getBasePtr();
13843  uint64_t BaseOffset = 0;
13844 
13845  const MachinePointerInfo &PtrInfo = St.getPointerInfo();
13846  SDValue NewST1 =
13847  DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
13848  OrigAlignment, St.getMemOperand()->getFlags());
13849 
13850  // As this in ISel, we will not merge this add which may degrade results.
13851  if (BasePtr->getOpcode() == ISD::ADD &&
13852  isa<ConstantSDNode>(BasePtr->getOperand(1))) {
13853  BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
13854  BasePtr = BasePtr->getOperand(0);
13855  }
13856 
13857  unsigned Offset = EltOffset;
13858  while (--NumVecElts) {
13859  unsigned Alignment = MinAlign(OrigAlignment, Offset);
13860  SDValue OffsetPtr =
13861  DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
13862  DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
13863  NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
13864  PtrInfo.getWithOffset(Offset), Alignment,
13865  St.getMemOperand()->getFlags());
13866  Offset += EltOffset;
13867  }
13868  return NewST1;
13869 }
13870 
13871 // Returns an SVE type that ContentTy can be trivially sign or zero extended
13872 // into.
13873 static MVT getSVEContainerType(EVT ContentTy) {
13874  assert(ContentTy.isSimple() && "No SVE containers for extended types");
13875 
13876  switch (ContentTy.getSimpleVT().SimpleTy) {
13877  default:
13878  llvm_unreachable("No known SVE container for this MVT type");
13879  case MVT::nxv2i8:
13880  case MVT::nxv2i16:
13881  case MVT::nxv2i32:
13882  case MVT::nxv2i64:
13883  case MVT::nxv2f32:
13884  case MVT::nxv2f64:
13885  return MVT::nxv2i64;
13886  case MVT::nxv4i8:
13887  case MVT::nxv4i16:
13888  case MVT::nxv4i32:
13889  case MVT::nxv4f32:
13890  return MVT::nxv4i32;
13891  case MVT::nxv8i8:
13892  case MVT::nxv8i16:
13893  case MVT::nxv8f16:
13894  case MVT::nxv8bf16:
13895  return MVT::nxv8i16;
13896  case MVT::nxv16i8:
13897  return MVT::nxv16i8;
13898  }
13899 }
13900 
13901 static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
13902  SDLoc DL(N);
13903  EVT VT = N->getValueType(0);
13904 
13906  return SDValue();
13907 
13908  EVT ContainerVT = VT;
13909  if (ContainerVT.isInteger())
13910  ContainerVT = getSVEContainerType(ContainerVT);
13911 
13912  SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
13913  SDValue Ops[] = { N->getOperand(0), // Chain
13914  N->getOperand(2), // Pg
13915  N->getOperand(3), // Base
13916  DAG.getValueType(VT) };
13917 
13918  SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
13919  SDValue LoadChain = SDValue(Load.getNode(), 1);
13920 
13921  if (ContainerVT.isInteger() && (VT != ContainerVT))
13922  Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
13923 
13924  return DAG.getMergeValues({ Load, LoadChain }, DL);
13925 }
13926 
13928  SDLoc DL(N);
13929  EVT VT = N->getValueType(0);
13930  EVT PtrTy = N->getOperand(3).getValueType();
13931 
13932  if (VT == MVT::nxv8bf16 &&
13933  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
13934  return SDValue();
13935 
13936  EVT LoadVT = VT;
13937  if (VT.isFloatingPoint())
13938  LoadVT = VT.changeTypeToInteger();
13939 
13940  auto *MINode = cast<MemIntrinsicSDNode>(N);
13941  SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
13942  SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
13943  MINode->getOperand(3), DAG.getUNDEF(PtrTy),
13944  MINode->getOperand(2), PassThru,
13945  MINode->getMemoryVT(), MINode->getMemOperand(),
13947 
13948  if (VT.isFloatingPoint()) {
13949  SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
13950  return DAG.getMergeValues(Ops, DL);
13951  }
13952 
13953  return L;
13954 }
13955 
13956 template <unsigned Opcode>
13958  static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
13959  Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
13960  "Unsupported opcode.");
13961  SDLoc DL(N);
13962  EVT VT = N->getValueType(0);
13963  if (VT == MVT::nxv8bf16 &&
13964  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
13965  return SDValue();
13966 
13967  EVT LoadVT = VT;
13968  if (VT.isFloatingPoint())
13969  LoadVT = VT.changeTypeToInteger();
13970 
13971  SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
13972  SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
13973  SDValue LoadChain = SDValue(Load.getNode(), 1);
13974 
13975  if (VT.isFloatingPoint())
13976  Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
13977 
13978  return DAG.getMergeValues({Load, LoadChain}, DL);
13979 }
13980 
13982  SDLoc DL(N);
13983  SDValue Data = N->getOperand(2);
13984  EVT DataVT = Data.getValueType();
13985  EVT HwSrcVt = getSVEContainerType(DataVT);
13986  SDValue InputVT = DAG.getValueType(DataVT);
13987 
13988  if (DataVT == MVT::nxv8bf16 &&
13989  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
13990  return SDValue();
13991 
13992  if (DataVT.isFloatingPoint())
13993  InputVT = DAG.getValueType(HwSrcVt);
13994 
13995  SDValue SrcNew;
13996  if (Data.getValueType().isFloatingPoint())
13997  SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
13998  else
13999  SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
14000 
14001  SDValue Ops[] = { N->getOperand(0), // Chain
14002  SrcNew,
14003  N->getOperand(4), // Base
14004  N->getOperand(3), // Pg
14005  InputVT
14006  };
14007 
14008  return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
14009 }
14010 
14012  SDLoc DL(N);
14013 
14014  SDValue Data = N->getOperand(2);
14015  EVT DataVT = Data.getValueType();
14016  EVT PtrTy = N->getOperand(4).getValueType();
14017 
14018  if (DataVT == MVT::nxv8bf16 &&
14019  !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14020  return SDValue();
14021 
14022  if (DataVT.isFloatingPoint())
14023  Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
14024 
14025  auto *MINode = cast<MemIntrinsicSDNode>(N);
14026  return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
14027  DAG.getUNDEF(PtrTy), MINode->getOperand(3),
14028  MINode->getMemoryVT(), MINode->getMemOperand(),
14029  ISD::UNINDEXED, false, false);
14030 }
14031 
14032 /// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
14033 /// load store optimizer pass will merge them to store pair stores. This should
14034 /// be better than a movi to create the vector zero followed by a vector store
14035 /// if the zero constant is not re-used, since one instructions and one register
14036 /// live range will be removed.
14037 ///
14038 /// For example, the final generated code should be:
14039 ///
14040 /// stp xzr, xzr, [x0]
14041 ///
14042 /// instead of:
14043 ///
14044 /// movi v0.2d, #0
14045 /// str q0, [x0]
14046 ///
14048  SDValue StVal = St.getValue();
14049  EVT VT = StVal.getValueType();
14050 
14051  // Avoid scalarizing zero splat stores for scalable vectors.
14052  if (VT.isScalableVector())
14053  return SDValue();
14054 
14055  // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
14056  // 2, 3 or 4 i32 elements.
14057  int NumVecElts = VT.getVectorNumElements();
14058  if (!(((NumVecElts == 2 || NumVecElts == 3) &&
14059  VT.getVectorElementType().getSizeInBits() == 64) ||
14060  ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
14061  VT.getVectorElementType().getSizeInBits() == 32)))
14062  return SDValue();
14063 
14064  if (StVal.getOpcode() != ISD::BUILD_VECTOR)
14065  return SDValue();
14066 
14067  // If the zero constant has more than one use then the vector store could be
14068  // better since the constant mov will be amortized and stp q instructions
14069  // should be able to be formed.
14070  if (!StVal.hasOneUse())
14071  return SDValue();
14072 
14073  // If the store is truncating then it's going down to i16 or smaller, which
14074  // means it can be implemented in a single store anyway.
14075  if (St.isTruncatingStore())
14076  return SDValue();
14077 
14078  // If the immediate offset of the address operand is too large for the stp
14079  // instruction, then bail out.
14080  if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
14081  int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
14082  if (Offset < -512 || Offset > 504)
14083  return SDValue();
14084  }
14085 
14086  for (int I = 0; I < NumVecElts; ++I) {
14087  SDValue EltVal = StVal.getOperand(I);
14088  if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
14089  return SDValue();
14090  }
14091 
14092  // Use a CopyFromReg WZR/XZR here to prevent
14093  // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
14094  SDLoc DL(&St);
14095  unsigned ZeroReg;
14096  EVT ZeroVT;
14097  if (VT.getVectorElementType().getSizeInBits() == 32) {
14098  ZeroReg = AArch64::WZR;
14099  ZeroVT = MVT::i32;
14100  } else {
14101  ZeroReg = AArch64::XZR;
14102  ZeroVT = MVT::i64;
14103  }
14104  SDValue SplatVal =
14105  DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
14106  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
14107 }
14108 
14109 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
14110 /// value. The load store optimizer pass will merge them to store pair stores.
14111 /// This has better performance than a splat of the scalar followed by a split
14112 /// vector store. Even if the stores are not merged it is four stores vs a dup,
14113 /// followed by an ext.b and two stores.
14115  SDValue StVal = St.getValue();
14116  EVT VT = StVal.getValueType();
14117 
14118  // Don't replace floating point stores, they possibly won't be transformed to
14119  // stp because of the store pair suppress pass.
14120  if (VT.isFloatingPoint())
14121  return SDValue();
14122 
14123  // We can express a splat as store pair(s) for 2 or 4 elements.
14124  unsigned NumVecElts = VT.getVectorNumElements();
14125  if (NumVecElts != 4 && NumVecElts != 2)
14126  return SDValue();
14127 
14128  // If the store is truncating then it's going down to i16 or smaller, which
14129  // means it can be implemented in a single store anyway.
14130  if (St.isTruncatingStore())
14131  return SDValue();
14132 
14133  // Check that this is a splat.
14134  // Make sure that each of the relevant vector element locations are inserted
14135  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
14136  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
14137  SDValue SplatVal;
14138  for (unsigned I = 0; I < NumVecElts; ++I) {
14139  // Check for insert vector elements.
14140  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
14141  return SDValue();
14142 
14143  // Check that same value is inserted at each vector element.
14144  if (I == 0)
14145  SplatVal = StVal.getOperand(1);
14146  else if (StVal.getOperand(1) != SplatVal)
14147  return SDValue();
14148 
14149  // Check insert element index.
14150  ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
14151  if (!CIndex)
14152  return SDValue();
14153  uint64_t IndexVal = CIndex->getZExtValue();
14154  if (IndexVal >= NumVecElts)
14155  return SDValue();
14156  IndexNotInserted.reset(IndexVal);
14157 
14158  StVal = StVal.getOperand(0);
14159  }
14160  // Check that all vector element locations were inserted to.
14161  if (IndexNotInserted.any())
14162  return SDValue();
14163 
14164  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
14165 }
14166 
14168  SelectionDAG &DAG,
14169  const AArch64Subtarget *Subtarget) {
14170 
14171  StoreSDNode *S = cast<StoreSDNode>(N);
14172  if (S->isVolatile() || S->isIndexed())
14173  return SDValue();
14174 
14175  SDValue StVal = S->getValue();
14176  EVT VT = StVal.getValueType();
14177 
14178  if (!VT.isFixedLengthVector())
14179  return SDValue();
14180 
14181  // If we get a splat of zeros, convert this vector store to a store of
14182  // scalars. They will be merged into store pairs of xzr thereby removing one
14183  // instruction and one register.
14184  if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
14185  return ReplacedZeroSplat;
14186 
14187  // FIXME: The logic for deciding if an unaligned store should be split should
14188  // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
14189  // a call to that function here.
14190 
14191  if (!Subtarget->isMisaligned128StoreSlow())
14192  return SDValue();
14193 
14194  // Don't split at -Oz.
14196  return SDValue();
14197 
14198  // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
14199  // those up regresses performance on micro-benchmarks and olden/bh.
14200  if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
14201  return SDValue();
14202 
14203  // Split unaligned 16B stores. They are terrible for performance.
14204  // Don't split stores with alignment of 1 or 2. Code that uses clang vector
14205  // extensions can use this to mark that it does not want splitting to happen
14206  // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
14207  // eliminating alignment hazards is only 1 in 8 for alignment of 2.
14208  if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
14209  S->getAlignment() <= 2)
14210  return SDValue();
14211 
14212  // If we get a splat of a scalar convert this vector store to a store of
14213  // scalars. They will be merged into store pairs thereby removing two
14214  // instructions.
14215  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
14216  return ReplacedSplat;
14217 
14218  SDLoc DL(S);
14219 
14220  // Split VT into two.
14221  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
14222  unsigned NumElts = HalfVT.getVectorNumElements();
14223  SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
14224  DAG.getConstant(0, DL, MVT::i64));
14225  SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
14226  DAG.getConstant(NumElts, DL, MVT::i64));
14227  SDValue BasePtr = S->getBasePtr();
14228  SDValue NewST1 =
14229  DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
14230  S->getAlignment(), S->getMemOperand()->getFlags());
14231  SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
14232  DAG.getConstant(8, DL, MVT::i64));
14233  return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
14234  S->getPointerInfo(), S->getAlignment(),
14235  S->getMemOperand()->getFlags());
14236 }
14237 
14239  SDLoc DL(N);
14240  SDValue Op0 = N->getOperand(0);
14241  SDValue Op1 = N->getOperand(1);
14242  EVT ResVT = N->getValueType(0);
14243 
14244  // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
14245  if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
14246  if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
14247  SDValue X = Op0.getOperand(0).getOperand(0);
14248  return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
14249  }
14250  }
14251 
14252  // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
14253  if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
14254  if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
14255  SDValue Z = Op1.getOperand(0).getOperand(1);
14256  return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
14257  }
14258  }
14259 
14260  return SDValue();
14261 }
14262 
14263 /// Target-specific DAG combine function for post-increment LD1 (lane) and
14264 /// post-increment LD1R.
14267  bool IsLaneOp) {
14268  if (DCI.isBeforeLegalizeOps())
14269  return SDValue();
14270 
14271  SelectionDAG &DAG = DCI.DAG;
14272  EVT VT = N->getValueType(0);
14273 
14274  if (VT.isScalableVector())
14275  return SDValue();
14276 
14277  unsigned LoadIdx = IsLaneOp ? 1 : 0;
14278  SDNode *LD = N->getOperand(LoadIdx).getNode();
14279  // If it is not LOAD, can not do such combine.
14280  if (LD->getOpcode() != ISD::LOAD)
14281  return SDValue();
14282 
14283  // The vector lane must be a constant in the LD1LANE opcode.
14284  SDValue Lane;
14285  if (IsLaneOp) {
14286  Lane = N->getOperand(2);
14287  auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
14288  if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
14289  return SDValue();
14290  }
14291 
14292  LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
14293  EVT MemVT = LoadSDN->getMemoryVT();
14294  // Check if memory operand is the same type as the vector element.
14295  if (MemVT != VT.getVectorElementType())
14296  return SDValue();
14297 
14298  // Check if there are other uses. If so, do not combine as it will introduce
14299  // an extra load.
14300  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
14301  ++UI) {
14302  if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
14303  continue;
14304  if (*UI != N)
14305  return SDValue();
14306  }
14307 
14308  SDValue Addr = LD->getOperand(1);
14309  SDValue Vector = N->getOperand(0);
14310  // Search for a use of the address operand that is an increment.
14311  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
14312  Addr.getNode()->use_end(); UI != UE; ++UI) {
14313  SDNode *User = *UI;
14314  if (User->getOpcode() != ISD::ADD
14315  || UI.getUse().getResNo() != Addr.getResNo())
14316  continue;
14317 
14318  // If the increment is a constant, it must match the memory ref size.
14319  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
14320  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
14321  uint32_t IncVal = CInc->getZExtValue();
14322  unsigned NumBytes = VT.getScalarSizeInBits() / 8;
14323  if (IncVal != NumBytes)
14324  continue;
14325  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
14326  }
14327 
14328  // To avoid cycle construction make sure that neither the load nor the add
14329  // are predecessors to each other or the Vector.
14332  Visited.insert(Addr.getNode());
14333  Worklist.push_back(User);
14334  Worklist.push_back(LD);
14335  Worklist.push_back(Vector.getNode());
14336  if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
14337  SDNode::hasPredecessorHelper(User, Visited, Worklist))
14338  continue;
14339 
14341  Ops.push_back(LD->getOperand(0)); // Chain
14342  if (IsLaneOp) {
14343  Ops.push_back(Vector); // The vector to be inserted
14344  Ops.push_back(Lane); // The lane to be inserted in the vector
14345  }
14346  Ops.push_back(Addr);
14347  Ops.push_back(Inc);
14348 
14349  EVT Tys[3] = { VT, MVT::i64, MVT::Other };
14350  SDVTList SDTys = DAG.getVTList(Tys);
14351  unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
14352  SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
14353  MemVT,
14354  LoadSDN->getMemOperand());
14355 
14356  // Update the uses.
14357  SDValue NewResults[] = {
14358  SDValue(LD, 0), // The result of load
14359  SDValue(UpdN.getNode(), 2) // Chain
14360  };
14361  DCI.CombineTo(LD, NewResults);
14362  DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
14363  DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
14364 
14365  break;
14366  }
14367  return SDValue();
14368 }
14369 
14370 /// Simplify ``Addr`` given that the top byte of it is ignored by HW during
14371 /// address translation.
14374  SelectionDAG &DAG) {
14375  APInt DemandedMask = APInt::getLowBitsSet(64, 56);
14376  KnownBits Known;
14378  !DCI.isBeforeLegalizeOps());
14379  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14380  if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
14381  DCI.CommitTargetLoweringOpt(TLO);
14382  return true;
14383  }
14384  return false;
14385 }
14386 
14389  SelectionDAG &DAG,
14390  const AArch64Subtarget *Subtarget) {
14391  if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
14392  return Split;
14393 
14394  if (Subtarget->supportsAddressTopByteIgnored() &&
14395  performTBISimplification(N->getOperand(2), DCI, DAG))
14396  return SDValue(N, 0);
14397 
14398  return SDValue();
14399 }
14400 
14403  SelectionDAG &DAG) {
14404  MaskedGatherScatterSDNode *MGS = cast<MaskedGatherScatterSDNode>(N);
14405  assert(MGS && "Can only combine gather load or scatter store nodes");
14406 
14407  SDLoc DL(MGS);
14408  SDValue Chain = MGS->getChain();
14409  SDValue Scale = MGS->getScale();
14410  SDValue Index = MGS->getIndex();
14411  SDValue Mask = MGS->getMask();
14412  SDValue BasePtr = MGS->getBasePtr();
14413  ISD::MemIndexType IndexType = MGS->getIndexType();
14414 
14415  EVT IdxVT = Index.getValueType();
14416 
14417  if (DCI.isBeforeLegalize()) {
14418  // SVE gather/scatter requires indices of i32/i64. Promote anything smaller
14419  // prior to legalisation so the result can be split if required.
14420  if ((IdxVT.getVectorElementType() == MVT::i8) ||
14421  (IdxVT.getVectorElementType() == MVT::i16)) {
14422  EVT NewIdxVT = IdxVT.changeVectorElementType(MVT::i32);
14423  if (MGS->isIndexSigned())
14424  Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index);
14425  else
14426  Index = DAG.getNode(ISD::ZERO_EXTEND, DL, NewIdxVT, Index);
14427 
14428  if (auto *MGT = dyn_cast<MaskedGatherSDNode>(MGS)) {
14429  SDValue PassThru = MGT->getPassThru();
14430  SDValue Ops[] = { Chain, PassThru, Mask, BasePtr, Index, Scale };
14431  return DAG.getMaskedGather(DAG.getVTList(N->getValueType(0), MVT::Other),
14432  PassThru.getValueType(), DL, Ops,
14433  MGT->getMemOperand(),
14434  MGT->getIndexType(), MGT->getExtensionType());
14435  } else {
14436  auto *MSC = cast<MaskedScatterSDNode>(MGS);
14437  SDValue Data = MSC->getValue();
14438  SDValue Ops[] = { Chain, Data, Mask, BasePtr, Index, Scale };
14439  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
14440  MSC->getMemoryVT(), DL, Ops,
14441  MSC->getMemOperand(), IndexType,
14442  MSC->isTruncatingStore());
14443  }
14444  }
14445  }
14446 
14447  return SDValue();
14448 }
14449 
14450 /// Target-specific DAG combine function for NEON load/store intrinsics
14451 /// to merge base address updates.
14454  SelectionDAG &DAG) {
14455  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14456  return SDValue();
14457 
14458  unsigned AddrOpIdx = N->getNumOperands() - 1;
14459  SDValue Addr = N->getOperand(AddrOpIdx);
14460 
14461  // Search for a use of the address operand that is an increment.
14462  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
14463  UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
14464  SDNode *User = *UI;
14465  if (User->getOpcode() != ISD::ADD ||
14466  UI.getUse().getResNo() != Addr.getResNo())
14467  continue;
14468 
14469  // Check that the add is independent of the load/store. Otherwise, folding
14470  // it would create a cycle.
14473  Visited.insert(Addr.getNode());
14474  Worklist.push_back(N);
14475  Worklist.push_back(User);
14476  if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
14477  SDNode::hasPredecessorHelper(User, Visited, Worklist))
14478  continue;
14479 
14480  // Find the new opcode for the updating load/store.
14481  bool IsStore = false;
14482  bool IsLaneOp = false;
14483  bool IsDupOp = false;
14484  unsigned NewOpc = 0;
14485  unsigned NumVecs = 0;
14486  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
14487  switch (IntNo) {
14488  default: llvm_unreachable("unexpected intrinsic for Neon base update");
14489  case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
14490  NumVecs = 2; break;
14491  case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
14492  NumVecs = 3; break;
14493  case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
14494  NumVecs = 4; break;
14495  case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
14496  NumVecs = 2; IsStore = true; break;
14497  case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
14498  NumVecs = 3; IsStore = true; break;
14499  case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
14500  NumVecs = 4; IsStore = true; break;
14501  case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
14502  NumVecs = 2; break;
14503  case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
14504  NumVecs = 3; break;
14505  case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
14506  NumVecs = 4; break;
14507  case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
14508  NumVecs = 2; IsStore = true; break;
14509  case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
14510  NumVecs = 3; IsStore = true; break;
14511  case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
14512  NumVecs = 4; IsStore = true; break;
14513  case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
14514  NumVecs = 2; IsDupOp = true; break;
14515  case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
14516  NumVecs = 3; IsDupOp = true; break;
14517  case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
14518  NumVecs = 4; IsDupOp = true; break;
14519  case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
14520  NumVecs = 2; IsLaneOp = true; break;
14521  case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
14522  NumVecs = 3; IsLaneOp = true; break;
14523  case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
14524  NumVecs = 4; IsLaneOp = true; break;
14525  case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
14526  NumVecs = 2; IsStore = true; IsLaneOp = true; break;
14527  case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
14528  NumVecs = 3; IsStore = true; IsLaneOp = true; break;
14529  case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
14530  NumVecs = 4; IsStore = true; IsLaneOp = true; break;
14531  }
14532 
14533  EVT VecTy;
14534  if (IsStore)
14535  VecTy = N->getOperand(2).getValueType();
14536  else
14537  VecTy = N->getValueType(0);
14538 
14539  // If the increment is a constant, it must match the memory ref size.
14540  SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
14541  if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
14542  uint32_t IncVal = CInc->getZExtValue();
14543  unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
14544  if (IsLaneOp || IsDupOp)
14545  NumBytes /= VecTy.getVectorNumElements();
14546  if (IncVal != NumBytes)
14547  continue;
14548  Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
14549  }
14551  Ops.push_back(N->getOperand(0)); // Incoming chain
14552  // Load lane and store have vector list as input.
14553  if (IsLaneOp || IsStore)
14554  for (unsigned i = 2; i < AddrOpIdx; ++i)
14555  Ops.push_back(N->getOperand(i));
14556  Ops.push_back(Addr); // Base register
14557  Ops.push_back(Inc);
14558 
14559  // Return Types.
14560  EVT Tys[6];
14561  unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
14562  unsigned n;
14563  for (n = 0; n < NumResultVecs; ++n)
14564  Tys[n] = VecTy;
14565  Tys[n++] = MVT::i64; // Type of write back register
14566  Tys[n] = MVT::Other; // Type of the chain
14567  SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
14568 
14569  MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
14570  SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
14571  MemInt->getMemoryVT(),
14572  MemInt->getMemOperand());
14573 
14574  // Update the uses.
14575  std::vector<SDValue> NewResults;
14576  for (unsigned i = 0; i < NumResultVecs; ++i) {
14577  NewResults.push_back(SDValue(UpdN.getNode(), i));
14578  }
14579  NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
14580  DCI.CombineTo(N, NewResults);
14581  DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
14582 
14583  break;
14584  }
14585  return SDValue();
14586 }
14587 
14588 // Checks to see if the value is the prescribed width and returns information
14589 // about its extension mode.
14590 static
14591 bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
14592  ExtType = ISD::NON_EXTLOAD;
14593  switch(V.getNode()->getOpcode()) {
14594  default:
14595  return false;
14596  case ISD::LOAD: {
14597  LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
14598  if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
14599  || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
14600  ExtType = LoadNode->getExtensionType();
14601  return true;
14602  }
14603  return false;
14604  }
14605  case ISD::AssertSext: {
14606  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
14607  if ((TypeNode->getVT() == MVT::i8 && width == 8)
14608  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
14609  ExtType = ISD::SEXTLOAD;
14610  return true;
14611  }
14612  return false;
14613  }
14614  case ISD::AssertZext: {
14615  VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
14616  if ((TypeNode->getVT() == MVT::i8 && width == 8)
14617  || (TypeNode->getVT() == MVT::i16 && width == 16)) {
14618  ExtType = ISD::ZEXTLOAD;
14619  return true;
14620  }
14621  return false;
14622  }
14623  case ISD::Constant:
14624  case ISD::TargetConstant: {
14625  return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
14626  1LL << (width - 1);
14627  }
14628  }
14629 
14630  return true;
14631 }
14632 
14633 // This function does a whole lot of voodoo to determine if the tests are
14634 // equivalent without and with a mask. Essentially what happens is that given a
14635 // DAG resembling:
14636 //
14637 // +-------------+ +-------------+ +-------------+ +-------------+
14638 // | Input | | AddConstant | | CompConstant| | CC |
14639 // +-------------+ +-------------+ +-------------+ +-------------+
14640 // | | | |
14641 // V V | +----------+
14642 // +-------------+ +----+ | |
14643 // | ADD | |0xff| | |
14644 // +-------------+ +----+ | |
14645 // | | | |
14646 // V V | |
14647 // +-------------+ | |
14648 // | AND | | |
14649 // +-------------+ | |
14650 // | | |
14651 // +-----+ | |
14652 // | | |
14653 // V V V
14654 // +-------------+
14655 // | CMP |
14656 // +-------------+
14657 //
14658 // The AND node may be safely removed for some combinations of inputs. In
14659 // particular we need to take into account the extension type of the Input,
14660 // the exact values of AddConstant, CompConstant, and CC, along with the nominal
14661 // width of the input (this can work for any width inputs, the above graph is
14662 // specific to 8 bits.
14663 //
14664 // The specific equations were worked out by generating output tables for each
14665 // AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
14666 // problem was simplified by working with 4 bit inputs, which means we only
14667 // needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
14668 // extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
14669 // patterns present in both extensions (0,7). For every distinct set of
14670 // AddConstant and CompConstants bit patterns we can consider the masked and
14671 // unmasked versions to be equivalent if the result of this function is true for
14672 // all 16 distinct bit patterns of for the current extension type of Input (w0).
14673 //
14674 // sub w8, w0, w1
14675 // and w10, w8, #0x0f
14676 // cmp w8, w2
14677 // cset w9, AArch64CC
14678 // cmp w10, w2
14679 // cset w11, AArch64CC
14680 // cmp w9, w11
14681 // cset w0, eq
14682 // ret
14683 //
14684 // Since the above function shows when the outputs are equivalent it defines
14685 // when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
14686 // would be expensive to run during compiles. The equations below were written
14687 // in a test harness that confirmed they gave equivalent outputs to the above
14688 // for all inputs function, so they can be used determine if the removal is
14689 // legal instead.
14690 //
14691 // isEquivalentMaskless() is the code for testing if the AND can be removed
14692 // factored out of the DAG recognition as the DAG can take several forms.
14693 
14694 static bool isEquivalentMaskless(unsigned CC, unsigned width,
14695  ISD::LoadExtType ExtType, int AddConstant,
14696  int CompConstant) {
14697  // By being careful about our equations and only writing the in term
14698  // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
14699  // make them generally applicable to all bit widths.
14700  int MaxUInt = (1 << width);
14701 
14702  // For the purposes of these comparisons sign extending the type is
14703  // equivalent to zero extending the add and displacing it by half the integer
14704  // width. Provided we are careful and make sure our equations are valid over
14705  // the whole range we can just adjust the input and avoid writing equations
14706  // for sign extended inputs.
14707  if (ExtType == ISD::SEXTLOAD)
14708  AddConstant -= (1 << (width-1));
14709 
14710  switch(CC) {
14711  case AArch64CC::LE:
14712  case AArch64CC::GT:
14713  if ((AddConstant == 0) ||
14714  (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
14715  (AddConstant >= 0 && CompConstant < 0) ||
14716  (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
14717  return true;
14718  break;
14719  case AArch64CC::LT:
14720  case AArch64CC::GE:
14721  if ((AddConstant == 0) ||
14722  (AddConstant >= 0 && CompConstant <= 0) ||
14723  (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
14724  return true;
14725  break;
14726  case AArch64CC::HI:
14727  case AArch64CC::LS:
14728  if ((AddConstant >= 0 && CompConstant < 0) ||
14729  (AddConstant <= 0 && CompConstant >= -1 &&
14730  CompConstant < AddConstant + MaxUInt))
14731  return true;
14732  break;
14733  case AArch64CC::PL:
14734  case AArch64CC::MI:
14735  if ((AddConstant == 0) ||
14736  (AddConstant > 0 && CompConstant <= 0) ||
14737  (AddConstant < 0 && CompConstant <= AddConstant))
14738  return true;
14739  break;
14740  case AArch64CC::LO:
14741  case AArch64CC::HS:
14742  if ((AddConstant >= 0 && CompConstant <= 0) ||
14743  (AddConstant <= 0 && CompConstant >= 0 &&
14744  CompConstant <= AddConstant + MaxUInt))
14745  return true;
14746  break;
14747  case AArch64CC::EQ:
14748  case AArch64CC::NE:
14749  if ((AddConstant > 0 && CompConstant < 0) ||
14750  (AddConstant < 0 && CompConstant >= 0 &&
14751  CompConstant < AddConstant + MaxUInt) ||
14752  (AddConstant >= 0 && CompConstant >= 0 &&
14753  CompConstant >= AddConstant) ||
14754  (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
14755  return true;
14756  break;
14757  case AArch64CC::VS:
14758  case AArch64CC::VC:
14759  case AArch64CC::AL:
14760  case AArch64CC::NV:
14761  return true;
14762  case AArch64CC::Invalid:
14763  break;
14764  }
14765 
14766  return false;
14767 }
14768 
14769 static
14772  SelectionDAG &DAG, unsigned CCIndex,
14773  unsigned CmpIndex) {
14774  unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
14775  SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
14776  unsigned CondOpcode = SubsNode->getOpcode();
14777 
14778  if (CondOpcode != AArch64ISD::SUBS)
14779  return SDValue();
14780 
14781  // There is a SUBS feeding this condition. Is it fed by a mask we can
14782  // use?
14783 
14784  SDNode *AndNode = SubsNode->getOperand(0).getNode();
14785  unsigned MaskBits = 0;
14786 
14787  if (AndNode->getOpcode() != ISD::AND)
14788  return SDValue();
14789 
14790  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
14791  uint32_t CNV = CN->getZExtValue();
14792  if (CNV == 255)
14793  MaskBits = 8;
14794  else if (CNV == 65535)
14795  MaskBits = 16;
14796  }
14797 
14798  if (!MaskBits)
14799  return SDValue();
14800 
14801  SDValue AddValue = AndNode->getOperand(0);
14802 
14803  if (AddValue.getOpcode() != ISD::ADD)
14804  return SDValue();
14805 
14806  // The basic dag structure is correct, grab the inputs and validate them.
14807 
14808  SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
14809  SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
14810  SDValue SubsInputValue = SubsNode->getOperand(1);
14811 
14812  // The mask is present and the provenance of all the values is a smaller type,
14813  // lets see if the mask is superfluous.
14814 
14815  if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
14816  !isa<ConstantSDNode>(SubsInputValue.getNode()))
14817  return SDValue();
14818 
14819  ISD::LoadExtType ExtType;
14820 
14821  if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
14822  !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
14823  !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
14824  return SDValue();
14825 
14826  if(!isEquivalentMaskless(CC, MaskBits, ExtType,
14827  cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
14828  cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
14829  return SDValue();
14830 
14831  // The AND is not necessary, remove it.
14832 
14833  SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
14834  SubsNode->getValueType(1));
14835  SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
14836 
14837  SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
14838  DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
14839 
14840  return SDValue(N, 0);
14841 }
14842 
14843 // Optimize compare with zero and branch.
14846  SelectionDAG &DAG) {
14847  MachineFunction &MF = DAG.getMachineFunction();
14848  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
14849  // will not be produced, as they are conditional branch instructions that do
14850  // not set flags.
14851  if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
14852  return SDValue();
14853 
14854  if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
14855  N = NV.getNode();
14856  SDValue Chain = N->getOperand(0);
14857  SDValue Dest = N->getOperand(1);
14858  SDValue CCVal = N->getOperand(2);
14859  SDValue Cmp = N->getOperand(3);
14860 
14861  assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
14862  unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
14863  if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
14864  return SDValue();
14865 
14866  unsigned CmpOpc = Cmp.getOpcode();
14867  if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
14868  return SDValue();
14869 
14870  // Only attempt folding if there is only one use of the flag and no use of the
14871  // value.
14872  if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
14873  return SDValue();
14874 
14875  SDValue LHS = Cmp.getOperand(0);
14876  SDValue RHS = Cmp.getOperand(1);
14877 
14878  assert(LHS.getValueType() == RHS.getValueType() &&
14879  "Expected the value type to be the same for both operands!");
14880  if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
14881  return SDValue();
14882 
14883  if (isNullConstant(LHS))
14884  std::swap(LHS, RHS);
14885 
14886  if (!isNullConstant(RHS))
14887  return SDValue();
14888 
14889  if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
14890  LHS.getOpcode() == ISD::SRL)
14891  return SDValue();
14892 
14893  // Fold the compare into the branch instruction.
14894  SDValue BR;
14895  if (CC == AArch64CC::EQ)
14896  BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
14897  else
14898  BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
14899 
14900  // Do not add new nodes to DAG combiner worklist.
14901  DCI.CombineTo(N, BR, false);
14902 
14903  return SDValue();
14904 }
14905 
14906 // Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
14907 // as well as whether the test should be inverted. This code is required to
14908 // catch these cases (as opposed to standard dag combines) because
14909 // AArch64ISD::TBZ is matched during legalization.
14910 static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
14911  SelectionDAG &DAG) {
14912 
14913  if (!Op->hasOneUse())
14914  return Op;
14915 
14916  // We don't handle undef/constant-fold cases below, as they should have
14917  // already been taken care of (e.g. and of 0, test of undefined shifted bits,
14918  // etc.)
14919 
14920  // (tbz (trunc x), b) -> (tbz x, b)
14921  // This case is just here to enable more of the below cases to be caught.
14922  if (Op->getOpcode() == ISD::TRUNCATE &&
14923  Bit < Op->getValueType(0).getSizeInBits()) {
14924  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14925  }
14926 
14927  // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
14928  if (Op->getOpcode() == ISD::ANY_EXTEND &&
14929  Bit < Op->getOperand(0).getValueSizeInBits()) {
14930  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14931  }
14932 
14933  if (Op->getNumOperands() != 2)
14934  return Op;
14935 
14936  auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
14937  if (!C)
14938  return Op;
14939 
14940  switch (Op->getOpcode()) {
14941  default:
14942  return Op;
14943 
14944  // (tbz (and x, m), b) -> (tbz x, b)
14945  case ISD::AND:
14946  if ((C->getZExtValue() >> Bit) & 1)
14947  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14948  return Op;
14949 
14950  // (tbz (shl x, c), b) -> (tbz x, b-c)
14951  case ISD::SHL:
14952  if (C->getZExtValue() <= Bit &&
14953  (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
14954  Bit = Bit - C->getZExtValue();
14955  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14956  }
14957  return Op;
14958 
14959  // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
14960  case ISD::SRA:
14961  Bit = Bit + C->getZExtValue();
14962  if (Bit >= Op->getValueType(0).getSizeInBits())
14963  Bit = Op->getValueType(0).getSizeInBits() - 1;
14964  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14965 
14966  // (tbz (srl x, c), b) -> (tbz x, b+c)
14967  case ISD::SRL:
14968  if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
14969  Bit = Bit + C->getZExtValue();
14970  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14971  }
14972  return Op;
14973 
14974  // (tbz (xor x, -1), b) -> (tbnz x, b)
14975  case ISD::XOR:
14976  if ((C->getZExtValue() >> Bit) & 1)
14977  Invert = !Invert;
14978  return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
14979  }
14980 }
14981 
14982 // Optimize test single bit zero/non-zero and branch.
14985  SelectionDAG &DAG) {
14986  unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
14987  bool Invert = false;
14988  SDValue TestSrc = N->getOperand(1);
14989  SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
14990 
14991  if (TestSrc == NewTestSrc)
14992  return SDValue();
14993 
14994  unsigned NewOpc = N->getOpcode();
14995  if (Invert) {
14996  if (NewOpc == AArch64ISD::TBZ)
14997  NewOpc = AArch64ISD::TBNZ;
14998  else {
14999  assert(NewOpc == AArch64ISD::TBNZ);
15000  NewOpc = AArch64ISD::TBZ;
15001  }
15002  }
15003 
15004  SDLoc DL(N);
15005  return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
15006  DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
15007 }
15008 
15009 // vselect (v1i1 setcc) ->
15010 // vselect (v1iXX setcc) (XX is the size of the compared operand type)
15011 // FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
15012 // condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
15013 // such VSELECT.
15015  SDValue N0 = N->getOperand(0);
15016  EVT CCVT = N0.getValueType();
15017 
15018  if (N0.getOpcode() != ISD::SETCC || CCVT.getVectorNumElements() != 1 ||
15019  CCVT.getVectorElementType() != MVT::i1)
15020  return SDValue();
15021 
15022  EVT ResVT = N->getValueType(0);
15023  EVT CmpVT = N0.getOperand(0).getValueType();
15024  // Only combine when the result type is of the same size as the compared
15025  // operands.
15026  if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
15027  return SDValue();
15028 
15029  SDValue IfTrue = N->getOperand(1);
15030  SDValue IfFalse = N->getOperand(2);
15031  SDValue SetCC =
15033  N0.getOperand(0), N0.getOperand(1),
15034  cast<CondCodeSDNode>(N0.getOperand(2))->get());
15035  return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
15036  IfTrue, IfFalse);
15037 }
15038 
15039 /// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
15040 /// the compare-mask instructions rather than going via NZCV, even if LHS and
15041 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
15042 /// with a vector one followed by a DUP shuffle on the result.
15045  SelectionDAG &DAG = DCI.DAG;
15046  SDValue N0 = N->getOperand(0);
15047  EVT ResVT = N->getValueType(0);
15048 
15049  if (N0.getOpcode() != ISD::SETCC)
15050  return SDValue();
15051 
15052  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
15053  // scalar SetCCResultType. We also don't expect vectors, because we assume
15054  // that selects fed by vector SETCCs are canonicalized to VSELECT.
15055  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
15056  "Scalar-SETCC feeding SELECT has unexpected result type!");
15057 
15058  // If NumMaskElts == 0, the comparison is larger than select result. The
15059  // largest real NEON comparison is 64-bits per lane, which means the result is
15060  // at most 32-bits and an illegal vector. Just bail out for now.
15061  EVT SrcVT = N0.getOperand(0).getValueType();
15062 
15063  // Don't try to do this optimization when the setcc itself has i1 operands.
15064  // There are no legal vectors of i1, so this would be pointless.
15065  if (SrcVT == MVT::i1)
15066  return SDValue();
15067 
15068  int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
15069  if (!ResVT.isVector() || NumMaskElts == 0)
15070  return SDValue();
15071 
15072  SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
15073  EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
15074 
15075  // Also bail out if the vector CCVT isn't the same size as ResVT.
15076  // This can happen if the SETCC operand size doesn't divide the ResVT size
15077  // (e.g., f64 vs v3f32).
15078  if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
15079  return SDValue();
15080 
15081  // Make sure we didn't create illegal types, if we're not supposed to.
15082  assert(DCI.isBeforeLegalize() ||
15083  DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
15084 
15085  // First perform a vector comparison, where lane 0 is the one we're interested
15086  // in.
15087  SDLoc DL(N0);
15088  SDValue LHS =
15089  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
15090  SDValue RHS =
15091  DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
15092  SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
15093 
15094  // Now duplicate the comparison mask we want across all other lanes.
15095  SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
15096  SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
15097  Mask = DAG.getNode(ISD::BITCAST, DL,
15099 
15100  return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
15101 }
15102 
15103 /// Get rid of unnecessary NVCASTs (that don't change the type).
15105  if (N->getValueType(0) == N->getOperand(0).getValueType())
15106  return N->getOperand(0);
15107 
15108  return SDValue();
15109 }
15110 
15111 // If all users of the globaladdr are of the form (globaladdr + constant), find
15112 // the smallest constant, fold it into the globaladdr's offset and rewrite the
15113 // globaladdr as (globaladdr + constant) - constant.
15115  const AArch64Subtarget *Subtarget,
15116  const TargetMachine &TM) {
15117  auto *GN = cast<GlobalAddressSDNode>(N);
15118  if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
15120  return SDValue();
15121 
15122  uint64_t MinOffset = -1ull;
15123  for (SDNode *N : GN->uses()) {
15124  if (N->getOpcode() != ISD::ADD)
15125  return SDValue();
15126  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
15127  if (!C)
15128  C = dyn_cast<ConstantSDNode>(N->getOperand(1));
15129  if (!C)
15130  return SDValue();
15131  MinOffset = std::min(MinOffset, C->getZExtValue());
15132  }
15133  uint64_t Offset = MinOffset + GN->getOffset();
15134 
15135  // Require that the new offset is larger than the existing one. Otherwise, we
15136  // can end up oscillating between two possible DAGs, for example,
15137  // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
15138  if (Offset <= uint64_t(GN->getOffset()))
15139  return SDValue();
15140 
15141  // Check whether folding this offset is legal. It must not go out of bounds of
15142  // the referenced object to avoid violating the code model, and must be
15143  // smaller than 2^21 because this is the largest offset expressible in all
15144  // object formats.
15145  //
15146  // This check also prevents us from folding negative offsets, which will end
15147  // up being treated in the same way as large positive ones. They could also
15148  // cause code model violations, and aren't really common enough to matter.
15149  if (Offset >= (1 << 21))
15150  return SDValue();
15151 
15152  const GlobalValue *GV = GN->getGlobal();
15153  Type *T = GV->getValueType();
15154  if (!T->isSized() ||
15156  return SDValue();
15157 
15158  SDLoc DL(GN);
15159  SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
15160  return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
15161  DAG.getConstant(MinOffset, DL, MVT::i64));
15162 }
15163 
15164 // Turns the vector of indices into a vector of byte offstes by scaling Offset
15165 // by (BitWidth / 8).
15167  SDLoc DL, unsigned BitWidth) {
15168  assert(Offset.getValueType().isScalableVector() &&
15169  "This method is only for scalable vectors of offsets");
15170 
15171  SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
15172  SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
15173 
15174  return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
15175 }
15176 
15177 /// Check if the value of \p OffsetInBytes can be used as an immediate for
15178 /// the gather load/prefetch and scatter store instructions with vector base and
15179 /// immediate offset addressing mode:
15180 ///
15181 /// [<Zn>.[S|D]{, #<imm>}]
15182 ///
15183 /// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
15184 
15185 inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
15186  unsigned ScalarSizeInBytes) {
15187  // The immediate is not a multiple of the scalar size.
15188  if (OffsetInBytes % ScalarSizeInBytes)
15189  return false;
15190 
15191  // The immediate is out of range.
15192  if (OffsetInBytes / ScalarSizeInBytes > 31)
15193  return false;
15194 
15195  return true;
15196 }
15197 
15198 /// Check if the value of \p Offset represents a valid immediate for the SVE
15199 /// gather load/prefetch and scatter store instructiona with vector base and
15200 /// immediate offset addressing mode:
15201 ///
15202 /// [<Zn>.[S|D]{, #<imm>}]
15203 ///
15204 /// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
15206  unsigned ScalarSizeInBytes) {
15207  ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
15208  return OffsetConst && isValidImmForSVEVecImmAddrMode(
15209  OffsetConst->getZExtValue(), ScalarSizeInBytes);
15210 }
15211 
15213  unsigned Opcode,
15214  bool OnlyPackedOffsets = true) {
15215  const SDValue Src = N->getOperand(2);
15216  const EVT SrcVT = Src->getValueType(0);
15217  assert(SrcVT.isScalableVector() &&
15218  "Scatter stores are only possible for SVE vectors");
15219 
15220  SDLoc DL(N);
15221  MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
15222 
15223  // Make sure that source data will fit into an SVE register
15225  return SDValue();
15226 
15227  // For FPs, ACLE only supports _packed_ single and double precision types.
15228  if (SrcElVT.isFloatingPoint())
15229  if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
15230  return SDValue();
15231 
15232  // Depending on the addressing mode, this is either a pointer or a vector of
15233  // pointers (that fits into one register)
15234  SDValue Base = N->getOperand(4);
15235  // Depending on the addressing mode, this is either a single offset or a
15236  // vector of offsets (that fits into one register)
15237  SDValue Offset = N->getOperand(5);
15238 
15239  // For "scalar + vector of indices", just scale the indices. This only
15240  // applies to non-temporal scatters because there's no instruction that takes
15241  // indicies.
15242  if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
15243  Offset =
15245  Opcode = AArch64ISD::SSTNT1_PRED;
15246  }
15247 
15248  // In the case of non-temporal gather loads there's only one SVE instruction
15249  // per data-size: "scalar + vector", i.e.
15250  // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
15251  // Since we do have intrinsics that allow the arguments to be in a different
15252  // order, we may need to swap them to match the spec.
15253  if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
15254  std::swap(Base, Offset);
15255 
15256  // SST1_IMM requires that the offset is an immediate that is:
15257  // * a multiple of #SizeInBytes,
15258  // * in the range [0, 31 x #SizeInBytes],
15259  // where #SizeInBytes is the size in bytes of the stored items. For
15260  // immediates outside that range and non-immediate scalar offsets use SST1 or
15261  // SST1_UXTW instead.
15262  if (Opcode == AArch64ISD::SST1_IMM_PRED) {
15264  SrcVT.getScalarSizeInBits() / 8)) {
15266  Opcode = AArch64ISD::SST1_UXTW_PRED;
15267  else
15268  Opcode = AArch64ISD::SST1_PRED;
15269 
15270  std::swap(Base, Offset);
15271  }
15272  }
15273 
15274  auto &TLI = DAG.getTargetLoweringInfo();
15275  if (!TLI.isTypeLegal(Base.getValueType()))
15276  return SDValue();
15277 
15278  // Some scatter store variants allow unpacked offsets, but only as nxv2i32
15279  // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
15280  // nxv2i64. Legalize accordingly.
15281  if (!OnlyPackedOffsets &&
15282  Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
15284 
15285  if (!TLI.isTypeLegal(Offset.getValueType()))
15286  return SDValue();
15287 
15288  // Source value type that is representable in hardware
15289  EVT HwSrcVt = getSVEContainerType(SrcVT);
15290 
15291  // Keep the original type of the input data to store - this is needed to be
15292  // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
15293  // FP values we want the integer equivalent, so just use HwSrcVt.
15294  SDValue InputVT = DAG.getValueType(SrcVT);
15295  if (SrcVT.isFloatingPoint())
15296  InputVT = DAG.getValueType(HwSrcVt);
15297 
15298  SDVTList VTs = DAG.getVTList(MVT::Other);
15299  SDValue SrcNew;
15300 
15301  if (Src.getValueType().isFloatingPoint())
15302  SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
15303  else
15304  SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
15305 
15306  SDValue Ops[] = {N->getOperand(0), // Chain
15307  SrcNew,
15308  N->getOperand(3), // Pg
15309  Base,
15310  Offset,
15311  InputVT};
15312 
15313  return DAG.getNode(Opcode, DL, VTs, Ops);
15314 }
15315 
15317  unsigned Opcode,
15318  bool OnlyPackedOffsets = true) {
15319  const EVT RetVT = N->getValueType(0);
15320  assert(RetVT.isScalableVector() &&
15321  "Gather loads are only possible for SVE vectors");
15322 
15323  SDLoc DL(N);
15324 
15325  // Make sure that the loaded data will fit into an SVE register
15327  return SDValue();
15328 
15329  // Depending on the addressing mode, this is either a pointer or a vector of
15330  // pointers (that fits into one register)
15331  SDValue Base = N->getOperand(3);
15332  // Depending on the addressing mode, this is either a single offset or a
15333  // vector of offsets (that fits into one register)
15334  SDValue Offset = N->getOperand(4);
15335 
15336  // For "scalar + vector of indices", just scale the indices. This only
15337  // applies to non-temporal gathers because there's no instruction that takes
15338  // indicies.
15339  if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
15341  RetVT.getScalarSizeInBits());
15343  }
15344 
15345  // In the case of non-temporal gather loads there's only one SVE instruction
15346  // per data-size: "scalar + vector", i.e.
15347  // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
15348  // Since we do have intrinsics that allow the arguments to be in a different
15349  // order, we may need to swap them to match the spec.
15350  if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
15351  Offset.getValueType().isVector())
15352  std::swap(Base, Offset);
15353 
15354  // GLD{FF}1_IMM requires that the offset is an immediate that is:
15355  // * a multiple of #SizeInBytes,
15356  // * in the range [0, 31 x #SizeInBytes],
15357  // where #SizeInBytes is the size in bytes of the loaded items. For
15358  // immediates outside that range and non-immediate scalar offsets use
15359  // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
15360  if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
15363  RetVT.getScalarSizeInBits() / 8)) {
15365  Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
15368  else
15369  Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
15372 
15373  std::swap(Base, Offset);
15374  }
15375  }
15376 
15377  auto &TLI = DAG.getTargetLoweringInfo();
15378  if (!TLI.isTypeLegal(Base.getValueType()))
15379  return SDValue();
15380 
15381  // Some gather load variants allow unpacked offsets, but only as nxv2i32
15382  // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
15383  // nxv2i64. Legalize accordingly.
15384  if (!OnlyPackedOffsets &&
15385  Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
15387 
15388  // Return value type that is representable in hardware
15389  EVT HwRetVt = getSVEContainerType(RetVT);
15390 
15391  // Keep the original output value type around - this is needed to be able to
15392  // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
15393  // values we want the integer equivalent, so just use HwRetVT.
15394  SDValue OutVT = DAG.getValueType(RetVT);
15395  if (RetVT.isFloatingPoint())
15396  OutVT = DAG.getValueType(HwRetVt);
15397 
15398  SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
15399  SDValue Ops[] = {N->getOperand(0), // Chain
15400  N->getOperand(2), // Pg
15401  Base, Offset, OutVT};
15402 
15403  SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
15404  SDValue LoadChain = SDValue(Load.getNode(), 1);
15405 
15406  if (RetVT.isInteger() && (RetVT != HwRetVt))
15407  Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
15408 
15409  // If the original return value was FP, bitcast accordingly. Doing it here
15410  // means that we can avoid adding TableGen patterns for FPs.
15411  if (RetVT.isFloatingPoint())
15412  Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
15413 
15414  return DAG.getMergeValues({Load, LoadChain}, DL);
15415 }
15416 
15417 static SDValue
15419  SelectionDAG &DAG) {
15420  SDLoc DL(N);
15421  SDValue Src = N->getOperand(0);
15422  unsigned Opc = Src->getOpcode();
15423 
15424  // Sign extend of an unsigned unpack -> signed unpack
15425  if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
15426 
15427  unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
15429 
15430  // Push the sign extend to the operand of the unpack
15431  // This is necessary where, for example, the operand of the unpack
15432  // is another unpack:
15433  // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
15434  // ->
15435  // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
15436  // ->
15437  // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
15438  SDValue ExtOp = Src->getOperand(0);
15439  auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
15440  EVT EltTy = VT.getVectorElementType();
15441  (void)EltTy;
15442 
15443  assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&
15444  "Sign extending from an invalid type");
15445 
15446  EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
15447 
15449  ExtOp, DAG.getValueType(ExtVT));
15450 
15451  return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
15452  }
15453 
15454  if (DCI.isBeforeLegalizeOps())
15455  return SDValue();
15456 
15458  return SDValue();
15459 
15460  // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
15461  // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
15462  unsigned NewOpc;
15463  unsigned MemVTOpNum = 4;
15464  switch (Opc) {
15466  NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
15467  MemVTOpNum = 3;
15468  break;
15471  MemVTOpNum = 3;
15472  break;
15475  MemVTOpNum = 3;
15476  break;
15479  break;
15482  break;
15485  break;
15488  break;
15491  break;
15494  break;
15497  break;
15500  break;
15503  break;
15506  break;
15509  break;
15512  break;
15515  break;
15518  break;
15521  break;
15522  default:
15523  return SDValue();
15524  }
15525 
15526  EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
15527  EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
15528 
15529  if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
15530  return SDValue();
15531 
15532  EVT DstVT = N->getValueType(0);
15533  SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
15534 
15536  for (unsigned I = 0; I < Src->getNumOperands(); ++I)
15537  Ops.push_back(Src->getOperand(I));
15538 
15539  SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
15540  DCI.CombineTo(N, ExtLoad);
15541  DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
15542 
15543  // Return N so it doesn't get rechecked
15544  return SDValue(N, 0);
15545 }
15546 
15547 /// Legalize the gather prefetch (scalar + vector addressing mode) when the
15548 /// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
15549 /// != nxv2i32) do not need legalization.
15551  const unsigned OffsetPos = 4;
15552  SDValue Offset = N->getOperand(OffsetPos);
15553 
15554  // Not an unpacked vector, bail out.
15555  if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
15556  return SDValue();
15557 
15558  // Extend the unpacked offset vector to 64-bit lanes.
15559  SDLoc DL(N);
15561  SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
15562  // Replace the offset operand with the 64-bit one.
15563  Ops[OffsetPos] = Offset;
15564 
15565  return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
15566 }
15567 
15568 /// Combines a node carrying the intrinsic
15569 /// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
15570 /// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
15571 /// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
15572 /// sve gather prefetch instruction with vector plus immediate addressing mode.
15574  unsigned ScalarSizeInBytes) {
15575  const unsigned ImmPos = 4, OffsetPos = 3;
15576  // No need to combine the node if the immediate is valid...
15577  if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
15578  return SDValue();
15579 
15580  // ...otherwise swap the offset base with the offset...
15581  SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
15582  std::swap(Ops[ImmPos], Ops[OffsetPos]);
15583  // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
15584  // `aarch64_sve_prfb_gather_uxtw_index`.
15585  SDLoc DL(N);
15586  Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
15587  MVT::i64);
15588 
15589  return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
15590 }
15591 
15593  DAGCombinerInfo &DCI) const {
15594  SelectionDAG &DAG = DCI.DAG;
15595  switch (N->getOpcode()) {
15596  default:
15597  LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
15598  break;
15599  case ISD::ABS:
15600  return performABSCombine(N, DAG, DCI, Subtarget);
15601  case ISD::ADD:
15602  case ISD::SUB:
15603  return performAddSubCombine(N, DCI, DAG);
15604  case ISD::XOR:
15605  return performXorCombine(N, DAG, DCI, Subtarget);
15606  case ISD::MUL:
15607  return performMulCombine(N, DAG, DCI, Subtarget);
15608  case ISD::SINT_TO_FP:
15609  case ISD::UINT_TO_FP:
15610  return performIntToFpCombine(N, DAG, Subtarget);
15611  case ISD::FP_TO_SINT:
15612  case ISD::FP_TO_UINT:
15613  return performFpToIntCombine(N, DAG, DCI, Subtarget);
15614  case ISD::FDIV:
15615  return performFDivCombine(N, DAG, DCI, Subtarget);
15616  case ISD::OR:
15617  return performORCombine(N, DCI, Subtarget);
15618  case ISD::AND:
15619  return performANDCombine(N, DCI);
15620  case ISD::SRL:
15621  return performSRLCombine(N, DCI);
15623  return performIntrinsicCombine(N, DCI, Subtarget);
15624  case ISD::ANY_EXTEND:
15625  case ISD::ZERO_EXTEND:
15626  case ISD::SIGN_EXTEND:
15627  return performExtendCombine(N, DCI, DAG);
15629  return performSignExtendInRegCombine(N, DCI, DAG);
15630  case ISD::TRUNCATE:
15631  return performVectorTruncateCombine(N, DCI, DAG);
15632  case ISD::CONCAT_VECTORS:
15633  return performConcatVectorsCombine(N, DCI, DAG);
15634  case ISD::SELECT:
15635  return performSelectCombine(N, DCI);
15636  case ISD::VSELECT:
15637  return performVSelectCombine(N, DCI.DAG);
15638  case ISD::LOAD:
15639  if (performTBISimplification(N->getOperand(1), DCI, DAG))
15640  return SDValue(N, 0);
15641  break;
15642  case ISD::STORE:
15643  return performSTORECombine(N, DCI, DAG, Subtarget);
15644  case ISD::MGATHER:
15645  case ISD::MSCATTER:
15646  return performMaskedGatherScatterCombine(N, DCI, DAG);
15647  case AArch64ISD::BRCOND:
15648  return performBRCONDCombine(N, DCI, DAG);
15649  case AArch64ISD::TBNZ:
15650  case AArch64ISD::TBZ:
15651  return performTBZCombine(N, DCI, DAG);
15652  case AArch64ISD::CSEL:
15653  return performCONDCombine(N, DCI, DAG, 2, 3);
15654  case AArch64ISD::DUP:
15655  return performPostLD1Combine(N, DCI, false);
15656  case AArch64ISD::NVCAST:
15657  return performNVCASTCombine(N);
15658  case AArch64ISD::UZP1:
15659  return performUzpCombine(N, DAG);
15661  return performPostLD1Combine(N, DCI, true);
15663  return performExtractVectorEltCombine(N, DAG);
15664  case ISD::VECREDUCE_ADD:
15665  return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
15666  case ISD::INTRINSIC_VOID:
15668  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15669  case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
15670  return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
15671  case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
15672  return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
15673  case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
15674  return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
15675  case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
15676  return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
15677  case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
15678  case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
15679  case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
15680  case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
15681  case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
15682  case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
15683  case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
15684  case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
15685  return legalizeSVEGatherPrefetchOffsVec(N, DAG);
15686  case Intrinsic::aarch64_neon_ld2:
15687  case Intrinsic::aarch64_neon_ld3:
15688  case Intrinsic::aarch64_neon_ld4:
15689  case Intrinsic::aarch64_neon_ld1x2:
15690  case Intrinsic::aarch64_neon_ld1x3:
15691  case Intrinsic::aarch64_neon_ld1x4:
15692  case Intrinsic::aarch64_neon_ld2lane:
15693  case Intrinsic::aarch64_neon_ld3lane:
15694  case Intrinsic::aarch64_neon_ld4lane:
15695  case Intrinsic::aarch64_neon_ld2r:
15696  case Intrinsic::aarch64_neon_ld3r:
15697  case Intrinsic::aarch64_neon_ld4r:
15698  case Intrinsic::aarch64_neon_st2:
15699  case Intrinsic::aarch64_neon_st3:
15700  case Intrinsic::aarch64_neon_st4:
15701  case Intrinsic::aarch64_neon_st1x2:
15702  case Intrinsic::aarch64_neon_st1x3:
15703  case Intrinsic::aarch64_neon_st1x4:
15704  case Intrinsic::aarch64_neon_st2lane:
15705  case Intrinsic::aarch64_neon_st3lane:
15706  case Intrinsic::aarch64_neon_st4lane:
15707  return performNEONPostLDSTCombine(N, DCI, DAG);
15708  case Intrinsic::aarch64_sve_ldnt1:
15709  return performLDNT1Combine(N, DAG);
15710  case Intrinsic::aarch64_sve_ld1rq:
15711  return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
15712  case Intrinsic::aarch64_sve_ld1ro:
15713  return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
15714  case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
15716  case Intrinsic::aarch64_sve_ldnt1_gather:
15718  case Intrinsic::aarch64_sve_ldnt1_gather_index:
15719  return performGatherLoadCombine(N, DAG,
15721  case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
15723  case Intrinsic::aarch64_sve_ld1:
15725  case Intrinsic::aarch64_sve_ldnf1:
15727  case Intrinsic::aarch64_sve_ldff1:
15729  case Intrinsic::aarch64_sve_st1:
15730  return performST1Combine(N, DAG);
15731  case Intrinsic::aarch64_sve_stnt1:
15732  return performSTNT1Combine(N, DAG);
15733  case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
15735  case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
15737  case Intrinsic::aarch64_sve_stnt1_scatter:
15739  case Intrinsic::aarch64_sve_stnt1_scatter_index:
15741  case Intrinsic::aarch64_sve_ld1_gather:
15743  case Intrinsic::aarch64_sve_ld1_gather_index:
15744  return performGatherLoadCombine(N, DAG,
15746  case Intrinsic::aarch64_sve_ld1_gather_sxtw:
15748  /*OnlyPackedOffsets=*/false);
15749  case Intrinsic::aarch64_sve_ld1_gather_uxtw:
15751  /*OnlyPackedOffsets=*/false);
15752  case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
15753  return performGatherLoadCombine(N, DAG,
15755  /*OnlyPackedOffsets=*/false);
15756  case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
15757  return performGatherLoadCombine(N, DAG,
15759  /*OnlyPackedOffsets=*/false);
15760  case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
15762  case Intrinsic::aarch64_sve_ldff1_gather:
15764  case Intrinsic::aarch64_sve_ldff1_gather_index:
15765  return performGatherLoadCombine(N, DAG,
15767  case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
15768  return performGatherLoadCombine(N, DAG,
15770  /*OnlyPackedOffsets=*/false);
15771  case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
15772  return performGatherLoadCombine(N, DAG,
15774  /*OnlyPackedOffsets=*/false);
15775  case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
15776  return performGatherLoadCombine(N, DAG,
15778  /*OnlyPackedOffsets=*/false);
15779  case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
15780  return performGatherLoadCombine(N, DAG,
15782  /*OnlyPackedOffsets=*/false);
15783  case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
15784  return performGatherLoadCombine(N, DAG,
15786  case Intrinsic::aarch64_sve_st1_scatter:
15788  case Intrinsic::aarch64_sve_st1_scatter_index:
15790  case Intrinsic::aarch64_sve_st1_scatter_sxtw:
15792  /*OnlyPackedOffsets=*/false);
15793  case Intrinsic::aarch64_sve_st1_scatter_uxtw:
15795  /*OnlyPackedOffsets=*/false);
15796  case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
15797  return performScatterStoreCombine(N, DAG,
15799  /*OnlyPackedOffsets=*/false);
15800  case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
15801  return performScatterStoreCombine(N, DAG,
15803  /*OnlyPackedOffsets=*/false);
15804  case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
15806  case Intrinsic::aarch64_sve_tuple_get: {
15807  SDLoc DL(N);
15808  SDValue Chain = N->getOperand(0);
15809  SDValue Src1 = N->getOperand(2);
15810  SDValue Idx = N->getOperand(3);
15811 
15812  uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
15813  EVT ResVT = N->getValueType(0);
15814  uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
15815  SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
15816  SDValue Val =
15817  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
15818  return DAG.getMergeValues({Val, Chain}, DL);
15819  }
15820  case Intrinsic::aarch64_sve_tuple_set: {
15821  SDLoc DL(N);
15822  SDValue Chain = N->getOperand(0);
15823  SDValue Tuple = N->getOperand(2);
15824  SDValue Idx = N->getOperand(3);
15825  SDValue Vec = N->getOperand(4);
15826 
15827  EVT TupleVT = Tuple.getValueType();
15828  uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
15829 
15830  uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
15831  uint64_t NumLanes =
15833 
15834  if ((TupleLanes % NumLanes) != 0)
15835  report_fatal_error("invalid tuple vector!");
15836 
15837  uint64_t NumVecs = TupleLanes / NumLanes;
15838 
15840  for (unsigned I = 0; I < NumVecs; ++I) {
15841  if (I == IdxConst)
15842  Opnds.push_back(Vec);
15843  else {
15844  SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
15846  Vec.getValueType(), Tuple, ExtIdx));
15847  }
15848  }
15849  SDValue Concat =
15850  DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
15851  return DAG.getMergeValues({Concat, Chain}, DL);
15852  }
15853  case Intrinsic::aarch64_sve_tuple_create2:
15854  case Intrinsic::aarch64_sve_tuple_create3:
15855  case Intrinsic::aarch64_sve_tuple_create4: {
15856  SDLoc DL(N);
15857  SDValue Chain = N->getOperand(0);
15858 
15860  for (unsigned I = 2; I < N->getNumOperands(); ++I)
15861  Opnds.push_back(N->getOperand(I));
15862 
15863  EVT VT = Opnds[0].getValueType();
15864  EVT EltVT = VT.getVectorElementType();
15865  EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
15866  VT.getVectorElementCount() *
15867  (N->getNumOperands() - 2));
15868  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
15869  return DAG.getMergeValues({Concat, Chain}, DL);
15870  }
15871  case Intrinsic::aarch64_sve_ld2:
15872  case Intrinsic::aarch64_sve_ld3:
15873  case Intrinsic::aarch64_sve_ld4: {
15874  SDLoc DL(N);
15875  SDValue Chain = N->getOperand(0);
15876  SDValue Mask = N->getOperand(2);
15877  SDValue BasePtr = N->getOperand(3);
15878  SDValue LoadOps[] = {Chain, Mask, BasePtr};
15879  unsigned IntrinsicID =
15880  cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15881  SDValue Result =
15882  LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
15883  return DAG.getMergeValues({Result, Chain}, DL);
15884  }
15885  default:
15886  break;
15887  }
15888  break;
15889  case ISD::GlobalAddress:
15890  return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
15891  }
15892  return SDValue();
15893 }
15894 
15895 // Check if the return value is used as only a return value, as otherwise
15896 // we can't perform a tail-call. In particular, we need to check for
15897 // target ISD nodes that are returns and any other "odd" constructs
15898 // that the generic analysis code won't necessarily catch.
15899 bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
15900  SDValue &Chain) const {
15901  if (N->getNumValues() != 1)
15902  return false;
15903  if (!N->hasNUsesOfValue(1, 0))
15904  return false;
15905 
15906  SDValue TCChain = Chain;
15907  SDNode *Copy = *N->use_begin();
15908  if (Copy->getOpcode() == ISD::CopyToReg) {
15909  // If the copy has a glue operand, we conservatively assume it isn't safe to
15910  // perform a tail call.
15911  if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
15912  MVT::Glue)
15913  return false;
15914  TCChain = Copy->getOperand(0);
15915  } else if (Copy->getOpcode() != ISD::FP_EXTEND)
15916  return false;
15917 
15918  bool HasRet = false;
15919  for (SDNode *Node : Copy->uses()) {
15920  if (Node->getOpcode() != AArch64ISD::RET_FLAG)
15921  return false;
15922  HasRet = true;
15923  }
15924 
15925  if (!HasRet)
15926  return false;
15927 
15928  Chain = TCChain;
15929  return true;
15930 }
15931 
15932 // Return whether the an instruction can potentially be optimized to a tail
15933 // call. This will cause the optimizers to attempt to move, or duplicate,
15934 // return instructions to help enable tail call optimizations for this
15935 // instruction.
15936 bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
15937  return CI->isTailCall();
15938 }
15939 
15940 bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
15941  SDValue &Offset,
15942  ISD::MemIndexedMode &AM,
15943  bool &IsInc,
15944  SelectionDAG &DAG) const {
15945  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
15946  return false;
15947 
15948  Base = Op->getOperand(0);
15949  // All of the indexed addressing mode instructions take a signed
15950  // 9 bit immediate offset.
15951  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
15952  int64_t RHSC = RHS->getSExtValue();
15953  if (Op->getOpcode() == ISD::SUB)
15954  RHSC = -(uint64_t)RHSC;
15955  if (!isInt<9>(RHSC))
15956  return false;
15957  IsInc = (Op->getOpcode() == ISD::ADD);
15958  Offset = Op->getOperand(1);
15959  return true;
15960  }
15961  return false;
15962 }
15963 
15964 bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
15965  SDValue &Offset,
15966  ISD::MemIndexedMode &AM,
15967  SelectionDAG &DAG) const {
15968  EVT VT;
15969  SDValue Ptr;
15970  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15971  VT = LD->getMemoryVT();
15972  Ptr = LD->getBasePtr();
15973  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15974  VT = ST->getMemoryVT();
15975  Ptr = ST->getBasePtr();
15976  } else
15977  return false;
15978 
15979  bool IsInc;
15980  if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
15981  return false;
15982  AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
15983  return true;
15984 }
15985 
15986 bool AArch64TargetLowering::getPostIndexedAddressParts(
15987  SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
15988  ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
15989  EVT VT;
15990  SDValue Ptr;
15991  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
15992  VT = LD->getMemoryVT();
15993  Ptr = LD->getBasePtr();
15994  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
15995  VT = ST->getMemoryVT();
15996  Ptr = ST->getBasePtr();
15997  } else
15998  return false;
15999 
16000  bool IsInc;
16001  if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
16002  return false;
16003  // Post-indexing updates the base, so it's not a valid transform
16004  // if that's not the same as the load's pointer.
16005  if (Ptr != Base)
16006  return false;
16007  AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
16008  return true;
16009 }
16010 
16012  SelectionDAG &DAG) {
16013  SDLoc DL(N);
16014  SDValue Op = N->getOperand(0);
16015 
16016  if (N->getValueType(0) != MVT::i16 ||
16017  (Op.getValueType() != MVT::f16 && Op.getValueType() != MVT::bf16))
16018  return;
16019 
16020  Op = SDValue(
16021  DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
16022  DAG.getUNDEF(MVT::i32), Op,
16023  DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
16024  0);
16025  Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
16026  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
16027 }
16028 
16031  SelectionDAG &DAG, unsigned InterOp,
16032  unsigned AcrossOp) {
16033  EVT LoVT, HiVT;
16034  SDValue Lo, Hi;
16035  SDLoc dl(N);
16036  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
16037  std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
16038  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
16039  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
16040  Results.push_back(SplitVal);
16041 }
16042 
16043 static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
16044  SDLoc DL(N);
16047  DAG.getNode(ISD::SRL, DL, MVT::i128, N,
16048  DAG.getConstant(64, DL, MVT::i64)));
16049  return std::make_pair(Lo, Hi);
16050 }
16051 
16052 void AArch64TargetLowering::ReplaceExtractSubVectorResults(
16054  SDValue In = N->getOperand(0);
16055  EVT InVT = In.getValueType();
16056 
16057  // Common code will handle these just fine.
16058  if (!InVT.isScalableVector() || !InVT.isInteger())
16059  return;
16060 
16061  SDLoc DL(N);
16062  EVT VT = N->getValueType(0);
16063 
16064  // The following checks bail if this is not a halving operation.
16065 
16066  ElementCount ResEC = VT.getVectorElementCount();
16067 
16068  if (InVT.getVectorElementCount() != (ResEC * 2))
16069  return;
16070 
16071  auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
16072  if (!CIndex)
16073  return;
16074 
16075  unsigned Index = CIndex->getZExtValue();
16076  if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
16077  return;
16078 
16079  unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
16080  EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
16081 
16082  SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
16083  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
16084 }
16085 
16086 // Create an even/odd pair of X registers holding integer value V.
16088  SDLoc dl(V.getNode());
16089  SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
16090  SDValue VHi = DAG.getAnyExtOrTrunc(
16091  DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
16092  dl, MVT::i64);
16093  if (DAG.getDataLayout().isBigEndian())
16094  std::swap (VLo, VHi);
16095  SDValue RegClass =
16096  DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
16097  SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
16098  SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
16099  const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
16100  return SDValue(
16101  DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
16102 }
16103 
16106  SelectionDAG &DAG,
16107  const AArch64Subtarget *Subtarget) {
16108  assert(N->getValueType(0) == MVT::i128 &&
16109  "AtomicCmpSwap on types less than 128 should be legal");
16110 
16111  if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
16112  // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
16113  // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
16114  SDValue Ops[] = {
16115  createGPRPairNode(DAG, N->getOperand(2)), // Compare value
16116  createGPRPairNode(DAG, N->getOperand(3)), // Store value
16117  N->getOperand(1), // Ptr
16118  N->getOperand(0), // Chain in
16119  };
16120 
16121  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
16122 
16123  unsigned Opcode;
16124  switch (MemOp->getOrdering()) {
16126  Opcode = AArch64::CASPX;
16127  break;
16129  Opcode = AArch64::CASPAX;
16130  break;
16132  Opcode = AArch64::CASPLX;
16133  break;
16136  Opcode = AArch64::CASPALX;
16137  break;
16138  default:
16139  llvm_unreachable("Unexpected ordering!");
16140  }
16141 
16142  MachineSDNode *CmpSwap = DAG.getMachineNode(
16143  Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
16144  DAG.setNodeMemRefs(CmpSwap, {MemOp});
16145 
16146  unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
16147  if (DAG.getDataLayout().isBigEndian())
16148  std::swap(SubReg1, SubReg2);
16149  SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
16150  SDValue(CmpSwap, 0));
16151  SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
16152  SDValue(CmpSwap, 0));
16153  Results.push_back(
16155  Results.push_back(SDValue(CmpSwap, 1)); // Chain out
16156  return;
16157  }
16158 
16159  auto Desired = splitInt128(N->getOperand(2), DAG);
16160  auto New = splitInt128(N->getOperand(3), DAG);
16161  SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
16162  New.first, New.second, N->getOperand(0)};
16163  SDNode *CmpSwap = DAG.getMachineNode(
16164  AArch64::CMP_SWAP_128, SDLoc(N),
16166 
16167  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
16168  DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
16169 
16170  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
16171  SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
16172  Results.push_back(SDValue(CmpSwap, 3));
16173 }
16174 
16175 void AArch64TargetLowering::ReplaceNodeResults(
16177  switch (N->getOpcode()) {
16178  default:
16179  llvm_unreachable("Don't know how to custom expand this");
16180  case ISD::BITCAST:
16182  return;
16183  case ISD::VECREDUCE_ADD:
16184  case ISD::VECREDUCE_SMAX:
16185  case ISD::VECREDUCE_SMIN:
16186  case ISD::VECREDUCE_UMAX:
16187  case ISD::VECREDUCE_UMIN:
16188  Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
16189  return;
16190 
16191  case ISD::CTPOP:
16192  if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
16193  Results.push_back(Result);
16194  return;
16195  case AArch64ISD::SADDV:
16197  return;
16198  case AArch64ISD::UADDV:
16200  return;
16201  case AArch64ISD::SMINV:
16203  return;
16204  case AArch64ISD::UMINV:
16206  return;
16207  case AArch64ISD::SMAXV:
16209  return;
16210  case AArch64ISD::UMAXV:
16212  return;
16213  case ISD::FP_TO_UINT:
16214  case ISD::FP_TO_SINT:
16215  assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
16216  // Let normal code take care of it by not adding anything to Results.
16217  return;
16218  case ISD::ATOMIC_CMP_SWAP:
16219  ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
16220  return;
16221  case ISD::LOAD: {
16222  assert(SDValue(N, 0).getValueType() == MVT::i128 &&
16223  "unexpected load's value type");
16224  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
16225  if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
16226  // Non-volatile loads are optimized later in AArch64's load/store
16227  // optimizer.
16228  return;
16229  }
16230 
16233  DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
16234  {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
16235  LoadNode->getMemOperand());
16236 
16238  Result.getValue(0), Result.getValue(1));
16239  Results.append({Pair, Result.getValue(2) /* Chain */});
16240  return;
16241  }
16243  ReplaceExtractSubVectorResults(N, Results, DAG);
16244  return;
16245  case ISD::INTRINSIC_WO_CHAIN: {
16246  EVT VT = N->getValueType(0);
16247  assert((VT == MVT::i8 || VT == MVT::i16) &&
16248  "custom lowering for unexpected type");
16249 
16250  ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
16251  Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
16252  switch (IntID) {
16253  default:
16254  return;
16255  case Intrinsic::aarch64_sve_clasta_n: {
16256  SDLoc DL(N);
16257  auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
16258  auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
16259  N->getOperand(1), Op2, N->getOperand(3));
16260  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
16261  return;
16262  }
16263  case Intrinsic::aarch64_sve_clastb_n: {
16264  SDLoc DL(N);
16265  auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
16266  auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
16267  N->getOperand(1), Op2, N->getOperand(3));
16268  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
16269  return;
16270  }
16271  case Intrinsic::aarch64_sve_lasta: {
16272  SDLoc DL(N);
16273  auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
16274  N->getOperand(1), N->getOperand(2));
16275  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
16276  return;
16277  }
16278  case Intrinsic::aarch64_sve_lastb: {
16279  SDLoc DL(N);
16280  auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
16281  N->getOperand(1), N->getOperand(2));
16282  Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
16283  return;
16284  }
16285  }
16286  }
16287  }
16288 }
16289 
16291  if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
16293  return true;
16294 }
16295 
16296 unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
16297  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
16298  // reciprocal if there are three or more FDIVs.
16299  return 3;
16300 }
16301 
16304  // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
16305  // v4i16, v2i32 instead of to promote.
16306  if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
16307  VT == MVT::v1f32)
16308  return TypeWidenVector;
16309 
16311 }
16312 
16313 // Loads and stores less than 128-bits are already atomic; ones above that
16314 // are doomed anyway, so defer to the default libcall and blame the OS when
16315 // things go wrong.
16317  unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
16318  return Size == 128;
16319 }
16320 
16321 // Loads and stores less than 128-bits are already atomic; ones above that
16322 // are doomed anyway, so defer to the default libcall and blame the OS when
16323 // things go wrong.
16326  unsigned Size = LI->getType()->getPrimitiveSizeInBits();
16328 }
16329 
16330 // For the real atomic operations, we have ldxr/stxr up to 128 bits,
16333  if (AI->isFloatingPointOperation())
16335 
16336  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
16337  if (Size > 128) return AtomicExpansionKind::None;
16338 
16339  // Nand is not supported in LSE.
16340  // Leave 128 bits to LLSC or CmpXChg.
16341  if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
16342  if (Subtarget->hasLSE())
16344  if (Subtarget->outlineAtomics()) {
16345  // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
16346  // Don't outline them unless
16347  // (1) high level <atomic> support approved:
16348  // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
16349  // (2) low level libgcc and compiler-rt support implemented by:
16350  // min/max outline atomics helpers
16351  if (AI->getOperation() != AtomicRMWInst::Min &&
16352  AI->getOperation() != AtomicRMWInst::Max &&
16353  AI->getOperation() != AtomicRMWInst::UMin &&
16354  AI->getOperation() != AtomicRMWInst::UMax) {
16356  }
16357  }
16358  }
16359 
16360  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
16361  // implement atomicrmw without spilling. If the target address is also on the
16362  // stack and close enough to the spill slot, this can lead to a situation
16363  // where the monitor always gets cleared and the atomic operation can never
16364  // succeed. So at -O0 lower this operation to a CAS loop.
16365  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
16367 
16369 }
16370 
16373  AtomicCmpXchgInst *AI) const {
16374  // If subtarget has LSE, leave cmpxchg intact for codegen.
16375  if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
16377  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
16378  // implement cmpxchg without spilling. If the address being exchanged is also
16379  // on the stack and close enough to the spill slot, this can lead to a
16380  // situation where the monitor always gets cleared and the atomic operation
16381  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
16382  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
16385 }
16386 
16388  AtomicOrdering Ord) const {
16389  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16390  Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
16391  bool IsAcquire = isAcquireOrStronger(Ord);
16392 
16393  // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
16394  // intrinsic must return {i64, i64} and we have to recombine them into a
16395  // single i128 here.
16396  if (ValTy->getPrimitiveSizeInBits() == 128) {
16397  Intrinsic::ID Int =
16398  IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
16399  Function *Ldxr = Intrinsic::getDeclaration(M, Int);
16400 
16401  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
16402  Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
16403 
16404  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
16405  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
16406  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
16407  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
16408  return Builder.CreateOr(
16409  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
16410  }
16411 
16412  Type *Tys[] = { Addr->getType() };
16413  Intrinsic::ID Int =
16414  IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
16415  Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
16416 
16417  Type *EltTy = cast<PointerType>(Addr->getType())->getElementType();
16418 
16419  const DataLayout &DL = M->getDataLayout();
16420  IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(EltTy));
16421  Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
16422 
16423  return Builder.CreateBitCast(Trunc, EltTy);
16424 }
16425 
16427  IRBuilder<> &Builder) const {
16428  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16429  Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
16430 }
16431 
16433  Value *Val, Value *Addr,
16434  AtomicOrdering Ord) const {
16435  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
16436  bool IsRelease = isReleaseOrStronger(Ord);
16437 
16438  // Since the intrinsics must have legal type, the i128 intrinsics take two
16439  // parameters: "i64, i64". We must marshal Val into the appropriate form
16440  // before the call.
16441  if (Val->getType()->getPrimitiveSizeInBits() == 128) {
16442  Intrinsic::ID Int =
16443  IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
16444  Function *Stxr = Intrinsic::getDeclaration(M, Int);
16445  Type *Int64Ty = Type::getInt64Ty(M->getContext());
16446 
16447  Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
16448  Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
16449  Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
16450  return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
16451  }
16452 
16453  Intrinsic::ID Int =
16454  IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
16455  Type *Tys[] = { Addr->getType() };
16456  Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
16457 
16458  const DataLayout &DL = M->getDataLayout();
16459  IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
16460  Val = Builder.CreateBitCast(Val, IntValTy);
16461 
16462  return Builder.CreateCall(Stxr,
16463  {Builder.CreateZExtOrBitCast(
16464  Val, Stxr->getFunctionType()->getParamType(0)),
16465  Addr});
16466 }
16467 
16469  Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
16470  if (Ty->isArrayTy())
16471  return true;
16472 
16473  const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
16474  if (TySize.isScalable() && TySize.getKnownMinSize() > 128)
16475  return true;
16476 
16477  return false;
16478 }
16479 
16480 bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
16481  EVT) const {
16482  return false;
16483 }
16484 
16485 static Value *UseTlsOffset(IRBuilder<> &IRB, unsigned Offset) {
16486  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
16487  Function *ThreadPointerFunc =
16488  Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
16489  return IRB.CreatePointerCast(
16490  IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
16491  Offset),
16492  IRB.getInt8PtrTy()->getPointerTo(0));
16493 }
16494 
16496  // Android provides a fixed TLS slot for the stack cookie. See the definition
16497  // of TLS_SLOT_STACK_GUARD in
16498  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
16499  if (Subtarget->isTargetAndroid())
16500  return UseTlsOffset(IRB, 0x28);
16501 
16502  // Fuchsia is similar.
16503  // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
16504  if (Subtarget->isTargetFuchsia())
16505  return UseTlsOffset(IRB, -0x10);
16506 
16507  return TargetLowering::getIRStackGuard(IRB);
16508 }
16509 
16511  // MSVC CRT provides functionalities for stack protection.
16512  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
16513  // MSVC CRT has a global variable holding security cookie.
16514  M.getOrInsertGlobal("__security_cookie",
16515  Type::getInt8PtrTy(M.getContext()));
16516 
16517  // MSVC CRT has a function to validate security cookie.
16518  FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
16519  "__security_check_cookie", Type::getVoidTy(M.getContext()),
16520  Type::getInt8PtrTy(M.getContext()));
16521  if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
16522  F->setCallingConv(CallingConv::Win64);
16523  F->addAttribute(1, Attribute::AttrKind::InReg);
16524  }
16525  return;
16526  }
16528 }
16529 
16531  // MSVC CRT has a global variable holding security cookie.
16532  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
16533  return M.getGlobalVariable("__security_cookie");
16535 }
16536 
16538  // MSVC CRT has a function to validate security cookie.
16539  if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
16540  return M.getFunction("__security_check_cookie");
16542 }
16543 
16545  // Android provides a fixed TLS slot for the SafeStack pointer. See the
16546  // definition of TLS_SLOT_SAFESTACK in
16547  // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
16548  if (Subtarget->isTargetAndroid())
16549  return UseTlsOffset(IRB, 0x48);
16550 
16551  // Fuchsia is similar.
16552  // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
16553  if (Subtarget->isTargetFuchsia())
16554  return UseTlsOffset(IRB, -0x8);
16555 
16557 }
16558 
16560  const Instruction &AndI) const {
16561  // Only sink 'and' mask to cmp use block if it is masking a single bit, since
16562  // this is likely to be fold the and/cmp/br into a single tbz instruction. It
16563  // may be beneficial to sink in other cases, but we would have to check that
16564  // the cmp would not get folded into the br to form a cbz for these to be
16565  // beneficial.
16566  ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
16567  if (!Mask)
16568  return false;
16569  return Mask->getValue().isPowerOf2();
16570 }
16571 
16575  unsigned OldShiftOpcode, unsigned NewShiftOpcode,
16576  SelectionDAG &DAG) const {
16577  // Does baseline recommend not to perform the fold by default?
16579  X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
16580  return false;
16581  // Else, if this is a vector shift, prefer 'shl'.
16582  return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
16583 }
16584 
16586  SDNode *N) const {
16587  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16588  !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
16589  return false;
16590  return true;
16591 }
16592 
16594  // Update IsSplitCSR in AArch64unctionInfo.
16595  AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
16596  AFI->setIsSplitCSR(true);
16597 }
16598 
16600  MachineBasicBlock *Entry,
16601  const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
16602  const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
16603  const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
16604  if (!IStart)
16605  return;
16606 
16607  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
16608  MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
16609  MachineBasicBlock::iterator MBBI = Entry->begin();
16610  for (const MCPhysReg *I = IStart; *I; ++I) {
16611  const TargetRegisterClass *RC = nullptr;
16612  if (AArch64::GPR64RegClass.contains(*I))
16613  RC = &AArch64::GPR64RegClass;
16614  else if (AArch64::FPR64RegClass.contains(*I))
16615  RC = &AArch64::FPR64RegClass;
16616  else
16617  llvm_unreachable("Unexpected register class in CSRsViaCopy!");
16618 
16619  Register NewVR = MRI->createVirtualRegister(RC);
16620  // Create copy from CSR to a virtual register.
16621  // FIXME: this currently does not emit CFI pseudo-instructions, it works
16622  // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
16623  // nounwind. If we want to generalize this later, we may need to emit
16624  // CFI pseudo-instructions.
16625  assert(Entry->getParent()->getFunction().hasFnAttribute(
16626  Attribute::NoUnwind) &&
16627  "Function should be nounwind in insertCopiesSplitCSR!");
16628  Entry->addLiveIn(*I);
16629  BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
16630  .addReg(*I);
16631 
16632  // Insert the copy-back instructions right before the terminator.
16633  for (auto *Exit : Exits)
16634  BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
16635  TII->get(TargetOpcode::COPY), *I)
16636  .addReg(NewVR);
16637  }
16638 }
16639 
16641  // Integer division on AArch64 is expensive. However, when aggressively
16642  // optimizing for code size, we prefer to use a div instruction, as it is
16643  // usually smaller than the alternative sequence.
16644  // The exception to this is vector division. Since AArch64 doesn't have vector
16645  // integer division, leaving the division as-is is a loss even in terms of
16646  // size, because it will have to be scalarized, while the alternative code
16647  // sequence can be performed in vector form.
16648  bool OptSize = Attr.hasFnAttribute(Attribute::MinSize);
16649  return OptSize && !VT.isVector();
16650 }
16651 
16653  // We want inc-of-add for scalars and sub-of-not for vectors.
16654  return VT.isScalarInteger();
16655 }
16656 
16658  return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
16659 }
16660 
16661 unsigned
16663  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
16664  return getPointerTy(DL).getSizeInBits();
16665 
16666  return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
16667 }
16668 
16669 void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
16672 }
16673 
16674 // Unlike X86, we let frame lowering assign offsets to all catch objects.
16676  return false;
16677 }
16678 
16679 bool AArch64TargetLowering::shouldLocalize(
16680  const MachineInstr &MI, const TargetTransformInfo *TTI) const {
16681  switch (MI.getOpcode()) {
16682  case TargetOpcode::G_GLOBAL_VALUE: {
16683  // On Darwin, TLS global vars get selected into function calls, which
16684  // we don't want localized, as they can get moved into the middle of a
16685  // another call sequence.
16686  const GlobalValue &GV = *MI.getOperand(1).getGlobal();
16687  if (GV.isThreadLocal() && Subtarget->isTargetMachO())
16688  return false;
16689  break;
16690  }
16691  // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
16692  // localizable.
16693  case AArch64::ADRP:
16694  case AArch64::G_ADD_LOW:
16695  return true;
16696  default:
16697  break;
16698  }
16700 }
16701 
16703  if (isa<ScalableVectorType>(Inst.getType()))
16704  return true;
16705 
16706  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
16707  if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
16708  return true;
16709 
16710  if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
16711  if (isa<ScalableVectorType>(AI->getAllocatedType()))
16712  return true;
16713  }
16714 
16715  return false;
16716 }
16717 
16718 // Return the largest legal scalable vector type that matches VT's element type.
16720  assert(VT.isFixedLengthVector() &&
16721  DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
16722  "Expected legal fixed length vector!");
16723  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
16724  default:
16725  llvm_unreachable("unexpected element type for SVE container");
16726  case MVT::i8:
16727  return EVT(MVT::nxv16i8);
16728  case MVT::i16:
16729  return EVT(MVT::nxv8i16);
16730  case MVT::i32:
16731  return EVT(MVT::nxv4i32);
16732  case MVT::i64:
16733  return EVT(MVT::nxv2i64);
16734  case MVT::f16:
16735  return EVT(MVT::nxv8f16);
16736  case MVT::f32:
16737  return EVT(MVT::nxv4f32);
16738  case MVT::f64:
16739  return EVT(MVT::nxv2f64);
16740  }
16741 }
16742 
16743 // Return a PTRUE with active lanes corresponding to the extent of VT.
16745  EVT VT) {
16746  assert(VT.isFixedLengthVector() &&
16747  DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
16748  "Expected legal fixed length vector!");
16749 
16750  int PgPattern;
16751  switch (VT.getVectorNumElements()) {
16752  default:
16753  llvm_unreachable("unexpected element count for SVE predicate");
16754  case 1:
16755  PgPattern = AArch64SVEPredPattern::vl1;
16756  break;
16757  case 2:
16758  PgPattern = AArch64SVEPredPattern::vl2;
16759  break;
16760  case 4:
16761  PgPattern = AArch64SVEPredPattern::vl4;
16762  break;
16763  case 8:
16764  PgPattern = AArch64SVEPredPattern::vl8;
16765  break;
16766  case 16:
16767  PgPattern = AArch64SVEPredPattern::vl16;
16768  break;
16769  case 32:
16770  PgPattern = AArch64SVEPredPattern::vl32;
16771  break;
16772  case 64:
16773  PgPattern = AArch64SVEPredPattern::vl64;
16774  break;
16775  case 128:
16776  PgPattern = AArch64SVEPredPattern::vl128;
16777  break;
16778  case 256:
16779  PgPattern = AArch64SVEPredPattern::vl256;
16780  break;
16781  }
16782 
16783  // TODO: For vectors that are exactly getMaxSVEVectorSizeInBits big, we can
16784  // use AArch64SVEPredPattern::all, which can enable the use of unpredicated
16785  // variants of instructions when available.
16786 
16787  MVT MaskVT;
16788  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
16789  default:
16790  llvm_unreachable("unexpected element type for SVE predicate");
16791  case MVT::i8:
16792  MaskVT = MVT::nxv16i1;
16793  break;
16794  case MVT::i16:
16795  case MVT::f16:
16796  MaskVT = MVT::nxv8i1;
16797  break;
16798  case MVT::i32:
16799  case MVT::f32:
16800  MaskVT = MVT::nxv4i1;
16801  break;
16802  case MVT::i64:
16803  case MVT::f64:
16804  MaskVT = MVT::nxv2i1;
16805  break;
16806  }
16807 
16808  return DAG.getNode(AArch64ISD::PTRUE, DL, MaskVT,
16809  DAG.getTargetConstant(PgPattern, DL, MVT::i64));
16810 }
16811 
16813  EVT VT) {
16815  "Expected legal scalable vector!");
16816  auto PredTy = VT.changeVectorElementType(MVT::i1);
16817  return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
16818 }
16819 
16821  if (VT.isFixedLengthVector())
16822  return getPredicateForFixedLengthVector(DAG, DL, VT);
16823 
16824  return getPredicateForScalableVector(DAG, DL, VT);
16825 }
16826 
16827 // Grow V to consume an entire SVE register.
16829  assert(VT.isScalableVector() &&
16830  "Expected to convert into a scalable vector!");
16832  "Expected a fixed length vector operand!");
16833  SDLoc DL(V);
16834  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
16835  return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
16836 }
16837 
16838 // Shrink V so it's just big enough to maintain a VT's worth of data.
16840  assert(VT.isFixedLengthVector() &&
16841  "Expected to convert into a fixed length vector!");
16843  "Expected a scalable vector operand!");
16844  SDLoc DL(V);
16845  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
16846  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
16847 }
16848 
16849 // Convert all fixed length vector loads larger than NEON to masked_loads.
16850 SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
16851  SDValue Op, SelectionDAG &DAG) const {
16852  auto Load = cast<LoadSDNode>(Op);
16853 
16854  SDLoc DL(Op);
16855  EVT VT = Op.getValueType();
16856  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
16857 
16858  auto NewLoad = DAG.getMaskedLoad(
16859  ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
16860  getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
16861  Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
16862  Load->getExtensionType());
16863 
16864  auto Result = convertFromScalableVector(DAG, VT, NewLoad);
16865  SDValue MergedValues[2] = {Result, Load->getChain()};
16866  return DAG.getMergeValues(MergedValues, DL);
16867 }
16868 
16869 // Convert all fixed length vector stores larger than NEON to masked_stores.
16870 SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
16871  SDValue Op, SelectionDAG &DAG) const {
16872  auto Store = cast<StoreSDNode>(Op);
16873 
16874  SDLoc DL(Op);
16875  EVT VT = Store->getValue().getValueType();
16876  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
16877 
16878  auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
16879  return DAG.getMaskedStore(
16880  Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
16881  getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
16882  Store->getMemOperand(), Store->getAddressingMode(),
16883  Store->isTruncatingStore());
16884 }
16885 
16886 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
16887  SDValue Op, SelectionDAG &DAG) const {
16888  SDLoc dl(Op);
16889  EVT VT = Op.getValueType();
16890  EVT EltVT = VT.getVectorElementType();
16891 
16892  bool Signed = Op.getOpcode() == ISD::SDIV;
16893  unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
16894 
16895  // Scalable vector i32/i64 DIV is supported.
16896  if (EltVT == MVT::i32 || EltVT == MVT::i64)
16897  return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
16898 
16899  // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
16900  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
16901  EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
16902  EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
16903  EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
16904 
16905  // Convert the operands to scalable vectors.
16906  SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
16907  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
16908 
16909  // Extend the scalable operands.
16910  unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16911  unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
16912  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
16913  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
16914  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
16915  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
16916 
16917  // Convert back to fixed vectors so the DIV can be further lowered.
16918  Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
16919  Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
16920  Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
16921  Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
16922  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
16923  Op0Lo, Op1Lo);
16924  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
16925  Op0Hi, Op1Hi);
16926 
16927  // Convert again to scalable vectors to truncate.
16928  ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
16929  ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
16930  SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
16931  ResultLo, ResultHi);
16932 
16933  return convertFromScalableVector(DAG, VT, ScalableResult);
16934 }
16935 
16936 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
16937  SDValue Op, SelectionDAG &DAG) const {
16938  EVT VT = Op.getValueType();
16939  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
16940 
16941  SDLoc DL(Op);
16942  SDValue Val = Op.getOperand(0);
16943  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
16944  Val = convertToScalableVector(DAG, ContainerVT, Val);
16945 
16946  bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
16947  unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
16948 
16949  // Repeatedly unpack Val until the result is of the desired element type.
16950  switch (ContainerVT.getSimpleVT().SimpleTy) {
16951  default:
16952  llvm_unreachable("unimplemented container type");
16953  case MVT::nxv16i8:
16954  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
16955  if (VT.getVectorElementType() == MVT::i16)
16956  break;
16958  case MVT::nxv8i16:
16959  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
16960  if (VT.getVectorElementType() == MVT::i32)
16961  break;
16963  case MVT::nxv4i32:
16964  Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
16965  assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!");
16966  break;
16967  }
16968 
16969  return convertFromScalableVector(DAG, VT, Val);
16970 }
16971 
16972 SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
16973  SDValue Op, SelectionDAG &DAG) const {
16974  EVT VT = Op.getValueType();
16975  assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
16976 
16977  SDLoc DL(Op);
16978  SDValue Val = Op.getOperand(0);
16979  EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
16980  Val = convertToScalableVector(DAG, ContainerVT, Val);
16981 
16982  // Repeatedly truncate Val until the result is of the desired element type.
16983  switch (ContainerVT.getSimpleVT().SimpleTy) {
16984  default:
16985  llvm_unreachable("unimplemented container type");
16986  case MVT::nxv2i64:
16987  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
16988  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
16989  if (VT.getVectorElementType() == MVT::i32)
16990  break;
16992  case MVT::nxv4i32:
16993  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
16994  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
16995  if (VT.getVectorElementType() == MVT::i16)
16996  break;
16998  case MVT::nxv8i16:
16999  Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
17000  Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
17001  assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!");
17002  break;
17003  }
17004 
17005  return convertFromScalableVector(DAG, VT, Val);
17006 }
17007 
17008 // Convert vector operation 'Op' to an equivalent predicated operation whereby
17009 // the original operation's type is used to construct a suitable predicate.
17010 // NOTE: The results for inactive lanes are undefined.
17011 SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
17012  SelectionDAG &DAG,
17013  unsigned NewOp,
17014  bool OverrideNEON) const {
17015  EVT VT = Op.getValueType();
17016  SDLoc DL(Op);
17017  auto Pg = getPredicateForVector(DAG, DL, VT);
17018 
17019  if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
17020  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
17021 
17022  // Create list of operands by converting existing ones to scalable types.
17024  for (const SDValue &V : Op->op_values()) {
17025  if (isa<CondCodeSDNode>(V)) {
17026  Operands.push_back(V);
17027  continue;
17028  }
17029 
17030  if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
17031  EVT VTArg = VTNode->getVT().getVectorElementType();
17032  EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
17033  Operands.push_back(DAG.getValueType(NewVTArg));
17034  continue;
17035  }
17036 
17037  assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&
17038  "Only fixed length vectors are supported!");
17039  Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
17040  }
17041 
17042  if (isMergePassthruOpcode(NewOp))
17043  Operands.push_back(DAG.getUNDEF(ContainerVT));
17044 
17045  auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
17046  return convertFromScalableVector(DAG, VT, ScalableRes);
17047  }
17048 
17049  assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
17050 
17052  for (const SDValue &V : Op->op_values()) {
17053  assert((!V.getValueType().isVector() ||
17054  V.getValueType().isScalableVector()) &&
17055  "Only scalable vectors are supported!");
17056  Operands.push_back(V);
17057  }
17058 
17059  if (isMergePassthruOpcode(NewOp))
17060  Operands.push_back(DAG.getUNDEF(VT));
17061 
17062  return DAG.getNode(NewOp, DL, VT, Operands);
17063 }
17064 
17065 // If a fixed length vector operation has no side effects when applied to
17066 // undefined elements, we can safely use scalable vectors to perform the same
17067 // operation without needing to worry about predication.
17068 SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
17069  SelectionDAG &DAG) const {
17070  EVT VT = Op.getValueType();
17071  assert(useSVEForFixedLengthVectorVT(VT) &&
17072  "Only expected to lower fixed length vector operation!");
17073  EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
17074 
17075  // Create list of operands by converting existing ones to scalable types.
17077  for (const SDValue &V : Op->op_values()) {
17078  assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
17079 
17080  // Pass through non-vector operands.
17081  if (!V.getValueType().isVector()) {
17082  Ops.push_back(V);
17083  continue;
17084  }
17085 
17086  // "cast" fixed length vector to a scalable vector.
17087  assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&
17088  "Only fixed length vectors are supported!");
17089  Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
17090  }
17091 
17092  auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
17093  return convertFromScalableVector(DAG, VT, ScalableRes);
17094 }
17095 
17096 SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
17097  SelectionDAG &DAG) const {
17098  SDLoc DL(ScalarOp);
17099  SDValue AccOp = ScalarOp.getOperand(0);
17100  SDValue VecOp = ScalarOp.getOperand(1);
17101  EVT SrcVT = VecOp.getValueType();
17102  EVT ResVT = SrcVT.getVectorElementType();
17103 
17104  EVT ContainerVT = SrcVT;
17105  if (SrcVT.isFixedLengthVector()) {
17106  ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
17107  VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
17108  }
17109 
17110  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
17111  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
17112 
17113  // Convert operands to Scalable.
17114  AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
17115  DAG.getUNDEF(ContainerVT), AccOp, Zero);
17116 
17117  // Perform reduction.
17118  SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
17119  Pg, AccOp, VecOp);
17120 
17121  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
17122 }
17123 
17124 SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
17125  SelectionDAG &DAG) const {
17126  SDLoc DL(ReduceOp);
17127  SDValue Op = ReduceOp.getOperand(0);
17128  EVT OpVT = Op.getValueType();
17129  EVT VT = ReduceOp.getValueType();
17130 
17131  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
17132  return SDValue();
17133 
17134  SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
17135 
17136  switch (ReduceOp.getOpcode()) {
17137  default:
17138  return SDValue();
17139  case ISD::VECREDUCE_OR:
17140  return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
17141  case ISD::VECREDUCE_AND: {
17142  Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
17143  return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
17144  }
17145  case ISD::VECREDUCE_XOR: {
17146  SDValue ID =
17147  DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
17148  SDValue Cntp =
17150  return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
17151  }
17152  }
17153 
17154  return SDValue();
17155 }
17156 
17157 SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
17158  SDValue ScalarOp,
17159  SelectionDAG &DAG) const {
17160  SDLoc DL(ScalarOp);
17161  SDValue VecOp = ScalarOp.getOperand(0);
17162  EVT SrcVT = VecOp.getValueType();
17163 
17164  if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
17165  EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
17166  VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
17167  }
17168 
17169  // UADDV always returns an i64 result.
17170  EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
17171  SrcVT.getVectorElementType();
17172  EVT RdxVT = SrcVT;
17173  if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
17174  RdxVT = getPackedSVEVectorVT(ResVT);
17175 
17176  SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
17177  SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
17178  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
17179  Rdx, DAG.getConstant(0, DL, MVT::i64));
17180 
17181  // The VEC_REDUCE nodes expect an element size result.
17182  if (ResVT != ScalarOp.getValueType())
17183  Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
17184 
17185  return Res;
17186 }
17187 
17188 SDValue
17189 AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
17190  SelectionDAG &DAG) const {
17191  EVT VT = Op.getValueType();
17192  SDLoc DL(Op);
17193 
17194  EVT InVT = Op.getOperand(1).getValueType();
17195  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
17196  SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
17197  SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
17198 
17199  // Convert the mask to a predicated (NOTE: We don't need to worry about
17200  // inactive lanes since VSELECT is safe when given undefined elements).
17201  EVT MaskVT = Op.getOperand(0).getValueType();
17202  EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
17203  auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
17204  Mask = DAG.getNode(ISD::TRUNCATE, DL,
17205  MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
17206 
17207  auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
17208  Mask, Op1, Op2);
17209 
17210  return convertFromScalableVector(DAG, VT, ScalableRes);
17211 }
17212 
17213 SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
17214  SDValue Op, SelectionDAG &DAG) const {
17215  SDLoc DL(Op);
17216  EVT InVT = Op.getOperand(0).getValueType();
17217  EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
17218 
17219  assert(useSVEForFixedLengthVectorVT(InVT) &&
17220  "Only expected to lower fixed length vector operation!");
17221  assert(Op.getValueType() == InVT.changeTypeToInteger() &&
17222  "Expected integer result of the same bit length as the inputs!");
17223 
17224  // Expand floating point vector comparisons.
17225  if (InVT.isFloatingPoint())
17226  return SDValue();
17227 
17228  auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
17229  auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
17230  auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
17231 
17232  EVT CmpVT = Pg.getValueType();
17233  auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
17234  {Pg, Op1, Op2, Op.getOperand(2)});
17235 
17236  EVT PromoteVT = ContainerVT.changeTypeToInteger();
17237  auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
17238  return convertFromScalableVector(DAG, Op.getValueType(), Promote);
17239 }
17240 
17241 SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
17242  SelectionDAG &DAG) const {
17243  SDLoc DL(Op);
17244  EVT InVT = Op.getValueType();
17245  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17246  (void)TLI;
17247 
17248  assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&
17249  InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&
17250  "Only expect to cast between legal scalable vector types!");
17251  assert((VT.getVectorElementType() == MVT::i1) ==
17252  (InVT.getVectorElementType() == MVT::i1) &&
17253  "Cannot cast between data and predicate scalable vector types!");
17254 
17255  if (InVT == VT)
17256  return Op;
17257 
17258  if (VT.getVectorElementType() == MVT::i1)
17259  return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
17260 
17262  EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
17263  assert((VT == PackedVT || InVT == PackedInVT) &&
17264  "Cannot cast between unpacked scalable vector types!");
17265 
17266  // Pack input if required.
17267  if (InVT != PackedInVT)
17268  Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
17269 
17270  Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
17271 
17272  // Unpack result if required.
17273  if (VT != PackedVT)
17275 
17276  return Op;
17277 }
return AArch64::GPR64RegClass contains(Reg)
unsigned const MachineRegisterInfo * MRI
static unsigned MatchRegisterName(StringRef Name)
static bool isConcatMask(ArrayRef< int > Mask, EVT VT, bool SplitLHS)
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, const SDLoc &dl, SelectionDAG &DAG)
static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS, AArch64CC::CondCode CC, bool NoNans, EVT VT, const SDLoc &dl, SelectionDAG &DAG)
cl::opt< bool > EnableAArch64ELFLocalDynamicTLSGeneration("aarch64-elf-ldtls-generation", cl::Hidden, cl::desc("Allow AArch64 Local Dynamic TLS code generation"), cl::init(false))
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static void changeVectorFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC usable with the vector...
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isSingletonEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue performCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, unsigned CCIndex, unsigned CmpIndex)
static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue NormalizeBuildVector(SDValue Op, SelectionDAG &DAG)
static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.
static SDValue GenerateTBL(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo)
Check whether or not Op is a SET_CC operation, either a generic or an AArch64 lowered one.
static bool isLegalArithImmed(uint64_t C)
static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT)
static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG)
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static bool areExtractShuffleVectors(Value *Op1, Value *Op2)
Check if both Op1 and Op2 are shufflevector extracts of either the lower or upper half of the vector ...
unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend)
static bool isREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
static SDValue performSRLCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point divide by power of two into fixed-point to floating-point conversion.
static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset, SDLoc DL, unsigned BitWidth)
static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG)
static bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType)
static SDValue performSVEAndCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performBRCONDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static MVT getSVEContainerType(EVT ContentTy)
static bool isMergePassthruOpcode(unsigned Opc)
static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG)
Calculates what the pre-extend type is, based on the extension operation node provided by Extend.
static SDValue performNEONPostLDSTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Target-specific DAG combine function for NEON load/store intrinsics to merge base address updates.
static void ReplaceCMP_SWAP_128Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static SDValue performSelectCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with the compare-mask instruct...
static bool canGuaranteeTCO(CallingConv::ID CC)
Return true if the calling convention is one that we can guarantee TCO for.
static SDValue performABSCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes, unsigned ScalarSizeInBytes)
Check if the value of OffsetInBytes can be used as an immediate for the gather load/prefetch and scat...
static bool isUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of "vector_shuffle v,...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc, SelectionDAG &DAG)
#define LCALLNAME4(A, B)
static unsigned getDUPLANEOp(EVT EltType)
static void changeFPCCToAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
static bool isTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget, const TargetMachine &TM)
static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST, EVT VT, EVT MemVT, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL, AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI)
Set the IntrinsicInfo for the aarch64_sve_st<N> intrinsics.
static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG)
static SDValue performNVCASTCombine(SDNode *N)
Get rid of unnecessary NVCASTs (that don't change the type).
static EVT getPackedSVEVectorVT(EVT VT)
static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG)
static void ReplaceReductionResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, unsigned InterOp, unsigned AcrossOp)
static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, bool &FromHi)
An EXTR instruction is made up of two shifts, ORed together.
static bool isEquivalentMaskless(unsigned CC, unsigned width, ISD::LoadExtType ExtType, int AddConstant, int CompConstant)
static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG)
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT, unsigned Opcode, SelectionDAG &DAG)
static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N, SelectionDAG &DAG)
static void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits, APInt &UndefBits)
static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG)
static unsigned getIntrinsicID(const SDNode *N)
static bool IsSVECntIntrinsic(SDValue S)
static SDValue performANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool canEmitConjunction(const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static bool isWideDUPMask(ArrayRef< int > M, EVT VT, unsigned BlockSize, unsigned &DupLaneOp)
Check if a vector shuffle corresponds to a DUP instructions with a larger element width than the vect...
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, SDValue SplatVal, unsigned NumVecElts)
static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, bool isSigned)
static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG)
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST)
static cl::opt< bool > EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden, cl::desc("Combine extends of AArch64 masked " "gather intrinsics"), cl::init(true))
static SDValue performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG)
Legalize the gather prefetch (scalar + vector addressing mode) when the offset vector is an unpacked ...
static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG)
static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc)
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16)
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG)
static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG, unsigned ScalarSizeInBytes)
Combines a node carrying the intrinsic aarch64_sve_prf<T>_gather_scalar_offset into a node that uses ...
static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St)
Replace a splat of a scalar to a vector store by scalar stores of the scalar value.
unsigned getSignExtendedGatherOpcode(unsigned Opcode)
static SDValue performAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG)
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC)
changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64 CC
static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT, unsigned &Opcode, bool IsGather, SelectionDAG &DAG)
static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, unsigned NewOpc)
static unsigned getCmpOperandFoldingProfit(SDValue Op)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static SDValue performConcatVectorsCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N, SelectionDAG &DAG)
#define MAKE_CASE(V)
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG)
static std::pair< SDValue, SDValue > splitInt128(SDValue N, SelectionDAG &DAG)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static cl::opt< bool > EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden, cl::desc("Enable AArch64 logical imm instruction " "optimization"), cl::init(true))
static SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern)
static bool isEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseEXT, unsigned &Imm)
static SDValue tryCombineFixedPointConvert(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT)
static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG)
Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup)) making use of the vector SExt/ZE...
static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
Fold a floating-point multiply by power of two into floating-point to fixed-point conversion.
static EVT getPromotedVTForPredicate(EVT VT)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
Turn vector tests of the signbit in the form of: xor (sra X, elt_size(X)-1), -1 into: cmge X,...
static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG)
static bool isAllConstantBuildVector(const SDValue &PotentialBVec, uint64_t &ConstVal)
static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG)
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG)
static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op, AArch64CC::CondCode Cond)
static bool isSetCCOrZExtSetCC(const SDValue &Op, SetCCInfoAndKind &Info)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG)
static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performPostLD1Combine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, bool IsLaneOp)
Target-specific DAG combine function for post-increment LD1 (lane) and post-increment LD1R.
static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2)
Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG)
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static PredicateConstraint parsePredicateConstraint(StringRef Constraint)
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isPackedVectorType(EVT VT, SelectionDAG &DAG)
Returns true if VT's elements occupy the lowest bit positions of its associated register class withou...
static SDValue performAddSubLongCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static bool isTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of "vector_shuffle v,...
static bool isZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue performSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG, const AArch64Subtarget *Subtarget)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue performMaskedGatherScatterCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert, SelectionDAG &DAG)
static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl, SelectionDAG &DAG, SDValue Chain, bool IsSignaling)
static bool isSignExtended(SDNode *N, SelectionDAG &DAG)
static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG)
bool getGatherScatterIndexIsExtended(SDValue Index)
static bool isZeroExtended(SDNode *N, SelectionDAG &DAG)
std::pair< SDValue, uint64_t > lookThroughSignExtension(SDValue Val)
static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG, unsigned Opcode, bool OnlyPackedOffsets=true)
static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc, SelectionDAG &DAG)
static bool isCMN(SDValue Op, ISD::CondCode CC)
static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
EXTR instruction extracts a contiguous chunk of bits from two existing registers viewed as a high/low...
static bool isOperandOfVmullHighP64(Value *Op)
Check if Op could be used with vmull_high_p64 intrinsic.
static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode, SDValue Operand, SelectionDAG &DAG, int &ExtraSteps)
static bool isEssentiallyExtractHighSubvector(SDValue N)
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static unsigned getExtFactor(SDValue &V)
getExtFactor - Determine the adjustment factor for the position when generating an "extract from vect...
static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG)
static Value * UseTlsOffset(IRBuilder<> &IRB, unsigned Offset)
#define LCALLNAME5(A, B)
static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT)
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG)
static const MVT MVT_CC
Value type used for condition codes.
static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle, SelectionDAG &DAG)
Combines a dup(sext/zext) node pattern into sext/zext(dup) making use of the vector SExt/ZExt rather ...
static SDValue performTBZCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &AArch64cc, SelectionDAG &DAG, const SDLoc &dl)
static bool performTBISimplification(SDValue Addr, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
Simplify Addr given that the top byte of it is ignored by HW during address translation.
static SDValue performIntrinsicCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static bool isZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of "vector_shuffle v,...
static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V)
#define FALKOR_STRIDED_ACCESS_MD
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isConstant(const MachineInstr &MI)
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static const MCPhysReg GPRArgRegs[]
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
Function Alias Analysis Results
assume Assume Builder
This file contains the simple types necessary to represent the attributes associated with functions a...
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
uint64_t Align
uint64_t Offset
uint64_t Addr
uint32_t Index
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1035
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
#define RegName(no)
lazy value info
#define F(x, y, z)
Definition: MD5.cpp:56
#define I(x, y, z)
Definition: MD5.cpp:59
mir Rename Register Operands
unsigned const TargetRegisterInfo * TRI
unsigned Reg
#define T
Module.h This file contains the declarations for the Module class.
uint64_t CallInst * C
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Whole Quad Mode
static bool Enabled
Definition: Statistic.cpp:50
static const int BlockSize
Definition: TarWriter.cpp:33
This defines the Use class.
static constexpr int Concat[]
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
void setBytesInStackArgArea(unsigned bytes)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym)
void setArgumentStackToRestore(unsigned bytes)
void UpdateCustomCalleeSavedRegs(MachineFunction &MF) const
static bool hasSVEArgsOrReturn(const MachineFunction *MF)
unsigned getPrefLoopLogAlignment() const
unsigned getPrefFunctionLogAlignment() const
bool isMisaligned128StoreSlow() const
const AArch64InstrInfo * getInstrInfo() const override
unsigned getMaximumJumpTableSize() const
ARMProcFamilyEnum getProcFamily() const
Returns ARM processor family.
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
const AArch64RegisterInfo * getRegisterInfo() const override
const Triple & getTargetTriple() const
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
bool isCallingConvWin64(CallingConv::ID CC) const
unsigned getMinSVEVectorSizeInBits() const
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isXRegisterReserved(size_t i) const
bool requiresStrictAlign() const
bool predictableSelectIsExpensive() const
bool useSVEForFixedLengthVectors() const
bool hasCustomCallingConv() const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override
void initializeSplitCSR(MachineBasicBlock *Entry) const override
Perform necessary initialization to handle a subset of CSRs explicitly via copies.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
Return true if the given shuffle mask can be codegen'd directly, or if it should be stack expanded.
unsigned getVaListSizeInBits(const DataLayout &DL) const override
Returns the size of the platform's va_list object.
void insertCopiesSplitCSR(MachineBasicBlock *Entry, const SmallVectorImpl< MachineBasicBlock * > &Exits) const override
Insert explicit copies in entry and exit blocks.
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns true if the given (atomic) store should be expanded by the IR-level AtomicExpand pass into an...
int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
Value * emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
Provide custom lowering hooks for some operations.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
bool isIntDivCheap(EVT VT, AttributeList Attr) const override
Return true if integer divide is usually cheaper than a sequence of several shifts,...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC) const
Selects the correct CCAssignFn for a given CallingConvention value.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the ISD::SETCC ValueType.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
This method returns a target specific FastISel object, or null if the target does not support "fast" ...
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Selects the correct CCAssignFn for a given CallingConvention value.
MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const override
This callback is used to inspect load/store instructions and add target-specific MachineMemOperand fl...
bool isLegalICmpImmediate(int64_t) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a ldN intrinsic.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
bool fallBackToDAGISel(const Instruction &Inst) const override
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
MachineBasicBlock * EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const
Value * getIRStackGuard(IRBuilder<> &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool isZExtFree(Type *Ty1, Type *Ty2) const override
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
bool isProfitableToHoist(Instruction *I) const override
Check if it is profitable to hoist instruction in then/else to if.
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const override
Return true if it is profitable to reduce a load to a smaller type.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a stN intrinsic.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
Value * getSafeStackPointerLocation(IRBuilder<> &IRB) const override
If the target has a standard location for the unsafe stack pointer, returns the address of that locat...
MachineBasicBlock * EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const
LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &FuncAttributes) const override
LLT returning variant.
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const override
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool needsFixedCatchObjects() const override
Used for exception handling on Win64.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
AArch64TargetLowering(const TargetMachine &TM, const AArch64Subtarget &STI)
bool isLegalAddImmediate(int64_t) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool shouldConsiderGEPOffsetSplit() const override
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool enableAggressiveFMAFusion(EVT VT) const override
Enable aggressive FMA fusion on targets that want it.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override
EVT is not used in-tree, but is used by out-of-tree target.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Returns true if the target allows unaligned memory accesses of the specified type.
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override
Return true if SHIFT instructions should be expanded to SHIFT_PARTS instructions, and false if a libr...
Value * emitStoreConditional(IRBuilder<> &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
bool mergeStoresAfterLegalization(EVT VT) const override
SVE code generation for fixed length vectors does not custom lower BUILD_VECTOR.
APInt bitcastToAPInt() const
Definition: APFloat.h:1133
bool isPosZero() const
Definition: APFloat.h:1217
void dump() const
Definition: APFloat.cpp:4854
Class for arbitrary precision integers.
Definition: APInt.h:70
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:948
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
APInt sextOrSelf(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:976
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1259
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1700
unsigned logBase2() const
Definition: APInt.h:1816
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:963
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:369
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:469
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:667
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:655
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
Definition: APInt.h:1329
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1643
an instruction to allocate memory on the stack
Definition: Instructions.h:61
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:156
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:151
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:522
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:702
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:732
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:730
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:736
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:734
@ Nand
*p = ~(old & v)
Definition: Instructions.h:724
bool isFloatingPointOperation() const
Definition: Instructions.h:854
BinOp getOperation() const
Definition: Instructions.h:781
This is an SDNode representing atomic operations.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
const BlockAddress * getBlockAddress() const
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
ConstantSDNode * getConstantSplatNode(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted constant or null if this is not a constant splat.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1323
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1321
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
bool isZero() const
Return true if the value is positive or negative zero.
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:144
uint64_t getZExtValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:111
bool isBigEndian() const
Definition: DataLayout.h:241
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:500
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:835
A debug info location.
Definition: DebugLoc.h:33
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:197
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:643
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:134
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:228
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1635
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:239
arg_iterator arg_end()
Definition: Function.h:771
arg_iterator arg_begin()
Definition: Function.h:762
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:345
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:165
const GlobalValue * getGlobal() const
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:244
Type * getValueType() const
Definition: GlobalValue.h:273
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:446
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2434
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:508
PointerType * getInt8PtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer to an 8-bit integer value.
Definition: IRBuilder.h:561
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2202
Value * CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1917
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:178
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2673
This instruction inserts a single (scalar) element into a VectorType value.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
Class to represent integer types.
Definition: DerivedTypes.h:40
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
unsigned getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
static ElementCount getScalable(ScalarTy MinVal)
Definition: TypeSize.h:283
LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:357
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:295
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:293
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:272
Value * getPointerOperand()
Definition: Instructions.h:266
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
bool is128BitVector() const
Return true if this is a 128-bit vector type.
static mvt_range fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isScalableVector() const
Return true if this is a vector value type where the runtime length is machine dependent.
static mvt_range integer_valuetypes()
static mvt_range integer_fixedlen_vector_valuetypes()
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:490
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static mvt_range fp_fixedlen_vector_valuetypes()
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static mvt_range all_valuetypes()
SimpleValueType Iteration.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
bool is64BitVector() const
Return true if this is a 64-bit vector type.
static mvt_range fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setStackID(int ObjectIdx, uint8_t ID)
bool hasMustTailInVarArgFunc() const
Returns true if the function is variadic and contains a musttail call.
void setReturnAddressIsTaken(bool s)
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Function & getFunction()
Return the LLVM function that this machine code represents.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:64
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This class is used to represent an MGATHER node.
const SDValue & getPassThru() const
ISD::LoadExtType getExtensionType() const
This is a base class used to represent MGATHER and MSCATTER nodes.
const SDValue & getScale() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getMask() const
This class is used to represent an MSCATTER node.
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
bool isVolatile() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getBasePtr() const
bool isNonTemporal() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
void dump() const
Definition: Pass.cpp:131
Class to represent pointers.
Definition: DerivedTypes.h:658
Type * getElementType() const
Definition: DerivedTypes.h:677
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
unsigned getNumOperands() const
Return the number of values used by this operation.
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< use_iterator > uses()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool isUndef() const
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:223
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:690
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm)
Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
Definition: SelectionDAG.h:955
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:706
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, ISD::LoadExtType ExtTy)
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:700
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:950
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:934
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:797
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType, bool IsTruncating=false)
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:449
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:922
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
SDValue getExternalSymbol(const char *Sym, EVT VT)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:447
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:444
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:742
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:450
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:644
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:768
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:814
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVMContext * getContext() const
Definition: SelectionDAG.h:454
void addCallSiteInfo(const SDNode *CallNode, CallSiteInfoImpl &&CallInfo)
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:523
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:448
SDValue getSplatValue(SDValue V)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:442
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:73
size_t size() const
Definition: SmallVector.h:70
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:558
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
void push_back(const T &Elt)
Definition: SmallVector.h:404
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:272
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1169
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:130
An instruction for storing to memory.
Definition: Instructions.h:303
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:511
LLVM_NODISCARD StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:713
LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:160
Class to represent struct types.
Definition: DerivedTypes.h:212
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:366
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual Value * getSafeStackPointerLocation(IRBuilder<> &IRB) const
Returns the target-specific address of the unsafe stack pointer.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual Value * getIRStackGuard(IRBuilder<> &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const
Check whether or not MI needs to be moved close to its uses.
void setMaximumJumpTableSize(unsigned)
Indicate the maximum number of entries in jump tables.
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
unsigned MaxGluedStoresPerMemcpy
Specify max number of store instructions to glue in inlined memcpy.
void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
Convenience method to set an operation to Promote and specify the type in a single call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
unsigned getMaximumJumpTableSize() const
Return upper limit for number of entries in a jump table.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT) const
Return true if it is profitable to reduce a load to a smaller type.
virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, unsigned OldShiftOpcode, unsigned NewShiftOpcode, SelectionDAG &DAG) const
Given the pattern (X & (C l>>/<< Y)) ==/!= 0 return true if it should be transformed into: ((X <</l>>...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
MVT getFrameIndexTy(const DataLayout &DL) const
Return the type for frame index, which is determined by the alloca address space specified through th...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
TargetOptions Options
const Triple & getTargetTriple() const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned EmitCallSiteInfo
The flag enables call site info production.
unsigned TLSSize
Bit size of immediate TLS offsets (0 == use the default).
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:582
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:555
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
ScalarTy getFixedSize() const
Definition: TypeSize.h:421
ScalarTy getKnownMinSize() const
Definition: TypeSize.h:422
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:226
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
static Type * getVoidTy(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:313
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
@ FloatTyID
32-bit floating point type
Definition: Type.h:59
@ DoubleTyID
64-bit floating point type
Definition: Type.h:60
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:272
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
ScalarTy getValue() const
Definition: TypeSize.h:228
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
This class is used to represent EVT's, which are used to parameterize some operations.
LLVM Value Representation.
Definition: Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
Type * getElementType() const
Definition: DerivedTypes.h:433
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:81
#define UINT64_MAX
Definition: DataTypes.h:77
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
@ NVCAST
Natural vector cast.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType1(uint64_t Imm)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static constexpr unsigned SVEBitsPerBlock
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Key
PAL metadata keys.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AArch64_SVE_VectorCall
Calling convention between AArch64 SVE functions.
Definition: CallingConv.h:237
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:87
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:164
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:651
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:229
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:954
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:950
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:456
@ FLT_ROUNDS_
FLT_ROUNDS_ - Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest 2 Round to ...
Definition: ISDOpcodes.h:772
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
Definition: ISDOpcodes.h:1149
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1176
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:243
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:527
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:615
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:983
@ ConstantFP
Definition: ISDOpcodes.h:70
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:262
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:232
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:863
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:681
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:460
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:192
@ RETURNADDR
Definition: ISDOpcodes.h:88
@ GlobalAddress
Definition: ISDOpcodes.h:71
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:688
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:513
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1165
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:371
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:589
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:248
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1175
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:457
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1081
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:790
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:222
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1082
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1186
@ GlobalTLSAddress
Definition: ISDOpcodes.h:72
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:675
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:430
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:558
@ ADDROFRETURNADDR
ADDROFRETURNADDR - Represents the llvm.addressofreturnaddress intrinsic.
Definition: ISDOpcodes.h:94
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:879
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1162
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1042
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1166
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:857
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:808
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:905
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:307
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:884
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:888
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:329
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:628
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1177
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:215
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:565
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:979
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:303
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1170
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:570
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:606
@ ATOMIC_LOAD_CLR
Definition: ISDOpcodes.h:1080
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:550
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1079
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:541
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:505
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:196
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:678
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1033
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:643
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
Definition: ISDOpcodes.h:1139
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1062
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:840
@ UBSANTRAP
UBSANTRAP - Trap with an immediate describing the kind of sanitizer failure.
Definition: ISDOpcodes.h:1036
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:311
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:873
@ ConstantPool
Definition: ISDOpcodes.h:75
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:696
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:575
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:775
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:637
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:429
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1178
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:87
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1077
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:423
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:445
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:422
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:853
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1078
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:734
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1010
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:151
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:581
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1030
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:177
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:272
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:494
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1076
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:763
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:99
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:684
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:974
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:898
@ BlockAddress
Definition: ISDOpcodes.h:77
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:664
@ CATCHRET
CATCHRET - Represents a return from a catch block funclet.
Definition: ISDOpcodes.h:941
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:59
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:470
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:320
@ AssertZext
Definition: ISDOpcodes.h:60
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:185
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:485
bool isOverflowIntrOpRes(SDValue Op)
Returns true if the specified value is the overflow result from one of the overflow intrinsic nodes.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
Definition: ISDOpcodes.h:1253
@ SIGNED_UNSCALED
Definition: ISDOpcodes.h:1255
@ UNSIGNED_SCALED
Definition: ISDOpcodes.h:1256
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1241
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1292
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1272
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1331
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1243
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1252
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:147
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:101
class_match< UndefValue > m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:92
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Undef
Value of the register doesn't matter.
@ GeneralDynamic
Definition: CodeGen.h:43
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
CodeModel::Model getCodeModel()
constexpr double e
Definition: MathExtras.h:58
DiagnosticInfoOptimizationBase::Argument NV
@ FalseVal
Definition: TGLexer.h:61
static bool isInteger(StringRef Val)
Definition: ELFYAML.cpp:1304
This class represents lattice values for constants.
Definition: AllocatorList.h:23
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:458
bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1518
bool CC_AArch64_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
STATISTIC(NumFunctions, "Total number of functions")
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:2035
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:456
bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
bool CC_AArch64_Win64_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:497
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:412
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1332
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:603
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:339
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition: MathExtras.h:486
unsigned M1(unsigned Val)
Definition: VE.h:372
bool isReleaseOrStronger(AtomicOrdering AO)
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:226
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1341
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:157
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:474
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:461
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:158
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:673
bool isAsynchronousEHPersonality(EHPersonality Pers)
Returns true if this personality function catches asynchronous exceptions.
bool isAcquireOrStronger(AtomicOrdering AO)
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1556
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1525
gep_type_iterator gep_type_begin(const User *GEP)
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1667
bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
static const MachineMemOperand::Flags MOStridedAccess
bool CC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:177
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: BitVector.h:941
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:944
#define N
Helper structure to keep track of a SET_CC lowered into AArch64 code.
AArch64CC::CondCode CC
Helper structure to keep track of ISD::SET_CC operands.
Helper structure to be able to read SetCC information.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:355
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:121
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:246
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:262
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:131
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:315
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:417
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:333
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:324
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:345
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:424
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:278
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:177
EVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition: ValueTypes.h:111
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:341
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition: ValueTypes.h:398
bool isFixedLengthVector() const
Definition: ValueTypes.h:156
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:149
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:146
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:285
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:178
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition: ValueTypes.h:152
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:290
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:141
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:407
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:136
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:172
Describes a register that needs to be forwarded from the prologue to a musttail call.
bool isInConsecutiveRegs() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS)
Compute known bits common to LHS and RHS.
Definition: KnownBits.h:284
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
Structure used to represent pair of argument number after call lowering and register used to transfer...
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op is the value the constraint a...
Definition: PredicateInfo.h:75
These are IR-level optimization flags that may be propagated to SDNodes.
void setAllowReassociation(bool b)
void setNoUnsignedWrap(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This structure contains all information that is necessary for lowering calls.
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)
Helper structure to keep track of SetCC information.
GenericSetCCInfo Generic
AArch64SetCCInfo AArch64